平时看书的时候喜欢把笔记记在腾讯微博,想爬下来存个档。用nodejs写的,很简单,顺便感受下nodejs的生产力。
直接上代码
var request = require('request') , cheerio = require('cheerio'); var Future = require('fibers/future'); var user_id = 'songtianyi'; var url_prefix = 'http://t.qq.com/' + user_id; function Parse(url,last){ var f = new Future; request(url, function(err, resp, body){ if(err){ console.error(err); process.exit(1); } $ = cheerio.load(body); tweets = $('.msgCnt'); $(tweets).each(function(i, tweet){ console.info($(tweet).text()); }); pageBnt = $('.pageBtn'); var next = ''; if(pageBnt.length > 1){ next = $(pageBnt['1']).attr('href'); }else{ next = $(pageBnt).attr('href'); } if(url_prefix + next == last){ process.exit(0); } Parse(url_prefix + next, url); }); f.return(); } Parse(url_prefix,'');