admin管理员组

文章数量:1323714

I am trying to wrap my head around Node.js and some async operations. In the following code, I fetch some RSS feeds and store the articles found if I haven't stored them before. The code works and stores new articles. However, I'm not sure how to alter this so that I know when all the articles are done being parsed. For example, the callback on each async.eachLimit gets called each time the limit is met (after 10 articles or 5 feeds). So how do I know when they're done?

var FeedParser = require('feedparser');
var request = require('request');
var mysql = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
        host :  'localhost',
        user : 'someuser',
        password : 'somepass',
        database : 'somedb'
});

connection.connect();

connection.query('SELECT * FROM rssfeed', function(err, rows, fields) {
        if(err == null){
                async.eachLimit(rows, 5, parseFeed, function(err) {
                        if(! err) {
                                //horray
                        } else {
                                console.log(err);
                        }
                });
        }
});

function parseFeed(feed, callback) {
        var articles = [];
        request(feed.link)
        .pipe(new FeedParser())
        .on('error', function(error) {
                callback(error);
        })
        .on('meta', function(meta) {
        })
        .on('readable', function() {
                var stream = this, item;
                item = stream.read();
                if(item != null) {
                        item.rssfeed_id = feed.id;
                        articles.push(item);
                }
        })
        .on('finish', function() {
                async.eachLimit(articles, 10, parseArticle, function(err) {
                        if(! err) {
                                console.log('article each callback');
                        } else {
                                callback(error);
                        }
                });
        });
        callback();
}
function parseArticle(item, callback) {
        if(item.hasOwnProperty('rssfeed_id') && item.hasOwnProperty('guid') && item.hasOwnProperty('link') && item.hasOwnProperty('title')){
                connection.query('SELECT * FROM rssarticle WHERE rssfeed_id = ? AND guid = ?', [item.rssfeed_id, item.guid], function(err, rows, fields) {
                        if(rows.length == 0){
                                connection.query('INSERT INTO rssarticle SET ?', {
                                        rssfeed_id: item.rssfeed_id,
                                        link: item.link,
                                        title: item.title,
                                        description: item.description,
                                        publish_date: item.pubDate,
                                        guid: item.guid
                                }, function(err, result){
                                        if(err != null){
                                                console.log(err);
                                        }
                                });
                        }
                });
        }
        callback();
}

I am trying to wrap my head around Node.js and some async operations. In the following code, I fetch some RSS feeds and store the articles found if I haven't stored them before. The code works and stores new articles. However, I'm not sure how to alter this so that I know when all the articles are done being parsed. For example, the callback on each async.eachLimit gets called each time the limit is met (after 10 articles or 5 feeds). So how do I know when they're done?

var FeedParser = require('feedparser');
var request = require('request');
var mysql = require('mysql');
var async = require('async');

var connection = mysql.createConnection({
        host :  'localhost',
        user : 'someuser',
        password : 'somepass',
        database : 'somedb'
});

connection.connect();

connection.query('SELECT * FROM rssfeed', function(err, rows, fields) {
        if(err == null){
                async.eachLimit(rows, 5, parseFeed, function(err) {
                        if(! err) {
                                //horray
                        } else {
                                console.log(err);
                        }
                });
        }
});

function parseFeed(feed, callback) {
        var articles = [];
        request(feed.link)
        .pipe(new FeedParser())
        .on('error', function(error) {
                callback(error);
        })
        .on('meta', function(meta) {
        })
        .on('readable', function() {
                var stream = this, item;
                item = stream.read();
                if(item != null) {
                        item.rssfeed_id = feed.id;
                        articles.push(item);
                }
        })
        .on('finish', function() {
                async.eachLimit(articles, 10, parseArticle, function(err) {
                        if(! err) {
                                console.log('article each callback');
                        } else {
                                callback(error);
                        }
                });
        });
        callback();
}
function parseArticle(item, callback) {
        if(item.hasOwnProperty('rssfeed_id') && item.hasOwnProperty('guid') && item.hasOwnProperty('link') && item.hasOwnProperty('title')){
                connection.query('SELECT * FROM rssarticle WHERE rssfeed_id = ? AND guid = ?', [item.rssfeed_id, item.guid], function(err, rows, fields) {
                        if(rows.length == 0){
                                connection.query('INSERT INTO rssarticle SET ?', {
                                        rssfeed_id: item.rssfeed_id,
                                        link: item.link,
                                        title: item.title,
                                        description: item.description,
                                        publish_date: item.pubDate,
                                        guid: item.guid
                                }, function(err, result){
                                        if(err != null){
                                                console.log(err);
                                        }
                                });
                        }
                });
        }
        callback();
}
Share Improve this question asked Dec 10, 2013 at 22:59 nwalkenwalke 3,2096 gold badges37 silver badges61 bronze badges 1
  • Uhm, the callback is called after all the async operations in the iteration has pleted, are you looking for something else? – adeneo Commented Dec 10, 2013 at 23:03
Add a ment  | 

1 Answer 1

Reset to default 6

For one, you're preemptively calling your callbacks way too early.

function parseFeed(feed, callback) {
  request
    .streamStuff()
    .streamStuff()
    .streamStuff();

  callback();
}

You shouldn't be calling callback before you're done. Otherwise your "finished" method will be called but your async code will actually still be running.

So instead of doing:

.on('finish', function() {
    async.eachLimit(articles, 10, parseArticle, function(err) {
        if(! err) {
            console.log('article each callback');
        } else {
            callback(error);
        }
    });
});
callback();

Just do

.on('finish', function() {
    async.eachLimit(articles, 10, parseArticle, function(err) {
        if(! err) {
            // assuming this is a stub and really ends up doing `callback();`
            console.log('article each callback');
        } else {
            callback(error);
        }
    });
});

本文标签: javascriptnodejs asynceach callbackhow do I know when it39s doneStack Overflow