admin管理员组文章数量:1194605
I'm trying to get the title tag of a url with cheerio. But, I'm getting empty string values. This is my code:
app.get('/scrape', function(req, res){
url = '/';
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var title, release, rating;
var json = { title : "", release : "", rating : ""};
$('title').filter(function(){
//var data = $(this);
var data = $(this);
title = data.children().first().text();
release = data.children().last().children().text();
json.title = title;
json.release = release;
})
$('.star-box-giga-star').filter(function(){
var data = $(this);
rating = data.text();
json.rating = rating;
})
}
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err){
console.log('File successfully written! - Check your project directory for the output.json file');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send('Check your console!')
})
});
I'm trying to get the title tag of a url with cheerio. But, I'm getting empty string values. This is my code:
app.get('/scrape', function(req, res){
url = 'http://nrabinowitz.github.io/pjscrape/';
request(url, function(error, response, html){
if(!error){
var $ = cheerio.load(html);
var title, release, rating;
var json = { title : "", release : "", rating : ""};
$('title').filter(function(){
//var data = $(this);
var data = $(this);
title = data.children().first().text();
release = data.children().last().children().text();
json.title = title;
json.release = release;
})
$('.star-box-giga-star').filter(function(){
var data = $(this);
rating = data.text();
json.rating = rating;
})
}
fs.writeFile('output.json', JSON.stringify(json, null, 4), function(err){
console.log('File successfully written! - Check your project directory for the output.json file');
})
// Finally, we'll just send out a message to the browser reminding you that this app does not have a UI.
res.send('Check your console!')
})
});
Share
Improve this question
asked Apr 27, 2014 at 17:25
Filipe FerminianoFilipe Ferminiano
8,79127 gold badges113 silver badges180 bronze badges
1
- You are not handling the if (err) case make sure you add this and check if thats the case of error. – Risto Novik Commented Apr 27, 2014 at 19:19
2 Answers
Reset to default 24request(url, function (error, response, body)
{
if (!error && response.statusCode == 200)
{
var $ = cheerio.load(body);
var title = $("title").text();
}
})
Using Javascript we extract the text contained within the "title" tags.
If Robert Ryan's solution still doesn't work, I'd be suspicious of the formatting of the original page, which may be malformed somehow.
In my case I was accepting gzip and other compression but never decoding, so Cheerio was trying to parse compressed binary bits. When console logging the original body, I was able to spot the binary text instead of plain text HTML.
本文标签: javascriptGet title of a page with cheerioStack Overflow
版权声明:本文标题:javascript - Get title of a page with cheerio - Stack Overflow 内容由网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://www.betaflare.com/web/1738488143a2089543.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论