平时看书的时候喜欢把笔记记在腾讯微博,想爬下来存个档。用nodejs写的,很简单,顺便感受下nodejs的生产力。
直接上代码
var request = require('request')
, cheerio = require('cheerio');
var Future = require('fibers/future');
var user_id = 'songtianyi';
var url_prefix = 'http://t.qq.com/' + user_id;
function Parse(url,last){
var f = new Future;
request(url, function(err, resp, body){
if(err){
console.error(err);
process.exit(1);
}
$ = cheerio.load(body);
tweets = $('.msgCnt');
$(tweets).each(function(i, tweet){
console.info($(tweet).text());
});
pageBnt = $('.pageBtn');
var next = '';
if(pageBnt.length > 1){
next = $(pageBnt['1']).attr('href');
}else{
next = $(pageBnt).attr('href');
}
if(url_prefix + next == last){
process.exit(0);
}
Parse(url_prefix + next, url);
});
f.return();
}
Parse(url_prefix,'');