Nemo

Nemo 关注TA

路漫漫其修远兮,吾将上下而求索。

Nemo

Nemo

关注TA

路漫漫其修远兮,吾将上下而求索。

  •  普罗旺斯
  • 负责帅就完事了
  • 写了1,495,102字

该文章投稿至Nemo社区   Js、Css、Html  板块 复制链接


[Nodejs]第一个爬虫

发布于 2016/07/05 13:57 2,787浏览 2回复 3,203

var http = require('http');
var cheerio = require('cheerio');
var url = 'http://www.link-nemo.com/Cynthia/index.do';

function filterChapters(html){
        var $ = cheerio.load(html);
        var chapters = $('.article');

/**
        [{
                id:'',
                title:''
        }]
**/
        var articleData = [];
        chapters.each(function (item){
                var chapter = $(this);
                var chapterA = chapter.find('a');
                var articleId = chapterA.attr('href').split('&articleid=')[1];
                var articleTitle = chapterA.find('.mytitle').text();

                if(articleId!=null && articleId!='' && articleTitle!=null && articleTitle != ''){
                        articleData.push({
                                id:articleId,
                                title:articleTitle
                        });
                }

        });

        return articleData;
}


http.get(url,function (res){
        var html = '';
        res.on('data',function (data){
                html += data;
        });

        res.on('end',function (){
                var articles = filterChapters(html);
                console.log(articles);
        });

}).on('error',function (){
        console.log('Error.');
});

点赞(0)

上一个文章:[NodeJs]HTTP

下一个文章:Nginx 负载均衡配置

点了个评