// 初始url var url = "http://detail.zol.com.cn/convenienttravel/hangzhou/#list_merchant_loc" // 由于每个a标签下是相对路径,故需要一个根地址来拼接,如下 var urlRoot = "http://detail.zol.com.cn" // 存放所有url,之所以用set,是为了防止有相同的而重复爬去 var urls = newSet() // 存储所有数据 var data = []
var cheerio = require('cheerio'); var fetch = require('node-fetch'); var url = "http://detail.zol.com.cn/convenienttravel/hangzhou/#list_merchant_loc" var urlRoot = "http://detail.zol.com.cn" // var url = "http://localhost:3222/app1" var urls = newSet() var data = [] asyncfunctionad(arg){ let url2 = arg || url; var app = awaitfetch(url2).then(res=>res.text()) var $ = cheerio.load(app) var ele = $("#J_PicMode a.pic") var old_urls = [] var urlapp = [] for (let i = 0; i < ele.length; i++) { old_urls.push(fetch(urlRoot+$(ele[i]).attr('href')).then(res=>res.text())) } urlapp = awaitPromise.all(old_urls) for (let i = 0; i < urlapp.length; i++) { let $2 = cheerio.load(urlapp[i],{decodeEntities: false}) data.push({ name:$2(".product-model__name").text(), price:$2(".price-type").text() }) } var nextURL = $(".next").attr('href') if (nextURL){ let next = awaitfetch(urlRoot+nextURL).then(res=>res.text()) ad(urlRoot+nextURL) } return data } ad()