<tfoot draggable='sEl'></tfoot>

node-crawler

node-crawler 介绍

node-crawler这是一个比较好用的node.js爬虫框架，我们可以使用最亲切的jQuery语法来解析响应返回的页面。

node-crawler安装

npm install crawler

node-crawler使用

var Crawler = require("crawler");

var c = new Crawler({

maxConnections : 10,

// This will be called for each crawled page

callback : function (error,res,done) {

if(error){

console.log(error);

}else{

var $ = res.$;

// $ is Cheerio by default

//a lean implementation of core jQuery designed specifically for the server

console.log($("title").text());

}

done();

}

});

// Queue just one URL,with default callback

c.queue('http://HdhCmsTestamazon测试数据');

// Queue a list of URLs

c.queue(['http://HdhCmsTestgoogle测试数据/','http://HdhCmsTestyahoo测试数据']);

// Queue URLs with custom callbacks & p ara meters

c.queue([{

uri: 'http://parishackers.org/',

jQuery: false,

// The global callback won't be called

callback: function (error,done) {

if(error){

console.log(error);

}else{

console.log('Grab bed ',res.body.length,'bytes');

}

done();

}

}]);

// Queue some html code directly without grabbing (mostly for tests)

c.queue([{

html: '<p>This is a <strong>test</strong></p>'

}]);

网站地址 : http://nodecrawler.org

GitHub: https://github测试数据/bda-research/node-crawler

网站描述: 一款最好的node.js爬虫工具

node-crawler官方网站

官方网站： http://nodecrawler.org

如果觉得网站内容还不错，欢迎将网站推荐给程序员好友。

查看更多关于node-crawler的详细内容...

声明：本文来自网络，不代表【好得很程序员自学网】立场，转载请注明出处：http://haodehen.cn/did177347

更新时间：2023-04-20 阅读：79次

第1节：consolidate.js 第2节：node-crawler 第3节：nodemon 第4节：Agenda 第5节：axios 第6节：rxdb 第7节：node-mongodb-native 第8节：Koa 第9节：Nodal.js 第10节：synaptic 第11节：Lass.js 第12节：node-postgres【pg】第13节：nlp.js 第14节：bluebird 第15节：Feathers.js 第16节：Cheerio.js 第17节：Roarr 第18节：Kue 第19节：Mean.js 第20节：SuperAgent 第21节：Nest.js 第22节：pandora 第23节：node-opencv 第24节：nodemailer 第25节：jsdom 第26节：foy 第27节：node-cron 第28节：loopback 第29节：mongoose 第30节：Connect 第31节：node-orm2 第32节：DropIt 第33节：JSON-Server 第34节：Derby.js 第35节：yargs 第36节：node 第37节：node-formidable 第38节：Deno 第39节：sharp 第40节：nock 第41节：SocketCluster 第42节：nvm 第43节：node-clinic 第44节：node-xml2js 第45节：Express 第46节：vm2 第47节：node-red 第48节：pm2 第49节：Notif.me 第50节：Kraken.js 第51节：x-ray 第52节：GraphQL Server 第53节：Primus 第54节：lowdb 第55节：Restify.js 第56节：ora 第57节：socketstream 第58节：carlo 第59节：Mean.IO 第60节：knex.js 第61节：Horizon 第62节：avn 第63节：node-schedule 第64节：Mojito 第65节：ncc 第66节：lyo 第67节：mongous 第68节：ViralJS 第69节：Q.js 第70节：svgo 第71节：gnvm 第72节：Spine.js 第73节：Hapi.js 第74节：async 第75节：Fastify 第76节：rebridge 第77节：debug 第78节：Sails.js 第79节：node_redis 第80节：moleculer 第81节：chalk 第82节：colors.js 第83节：NeDB 第84节：RobotJs 第85节：Inquirer.js 第86节：commander.js 第87节：svg-captcha 第88节：awesome-nodejs 第89节：Node-SpliderApi 第90节：opencv4nodejs 第91节：GitBook 第92节：actionHero.js 第93节：Electrode 第94节：Egg.js 第95节：FlexSearch.js 第96节：passport.js 第97节：nodebestpractices 第98节：nodejieba 第99节：css-colorguard 第100节：istanbul 第101节：Sequelize 第102节：flatiron.js