nodejs爬腾讯微博

create@2014-10-25

平时看书的时候喜欢把笔记记在腾讯微博,想爬下来存个档。用nodejs写的,很简单,顺便感受下nodejs的生产力。


直接上代码

var request = require('request')
  , cheerio = require('cheerio');
var Future = require('fibers/future');

var user_id = 'songtianyi';
var url_prefix = 'http://t.qq.com/' + user_id;

function Parse(url,last){
    var f = new Future;
    request(url, function(err, resp, body){
        if(err){
            console.error(err);
            process.exit(1);
        }
        $ = cheerio.load(body);
        tweets = $('.msgCnt');
        $(tweets).each(function(i, tweet){
            console.info($(tweet).text());
        });
        pageBnt = $('.pageBtn');
        var next = '';
        if(pageBnt.length > 1){
            next = $(pageBnt['1']).attr('href');
        }else{
            next = $(pageBnt).attr('href');
        }
        if(url_prefix + next == last){
            process.exit(0);
        }
        Parse(url_prefix + next, url);
    });
    f.return();
}
Parse(url_prefix,'');