We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
const request = require('request-promise-native'); const cheerio = require('cheerio'); const config = require('../config'); const zhihuRoot = config.zhihu.root; const pageSize = config.page.pageSize; const ColumnModel = require('../model/column'); const ContentModel = require('../model/content'); const USER_NAME = 'anran-0423'; // db start const {host, database, port} = config.db; const mongoose = require('mongoose'); mongoose.Promise = global.Promise; mongoose.connect(host, database, port); const exploreColumns = async (offset, limit) => { // 获取我offset页开始的 每页limit的专栏数据 const paramObj = [`offset=${offset}`, `limit=${limit}`].join('&'); const options = { method: 'GET', uri: `https://www.zhihu.com/api/v4/members/${USER_NAME}/following-columns?${paramObj}`, json: true, }; const rsData = await request(options); const promiseArr = rsData.data.map(async (column) => { return ColumnModel .findOneAndUpdate({id: column.id}, column, {upsert: true, new: true}) .exec(); // 存储专栏相关的数据 这里涉及到findOneAndUpdate 与 update 方法的区别 // https://segmentfault.com/a/1190000009706886, // Mongoose: findOneAndUpdate doesn't return updated document // https://stackoverflow.com/questions/32811510/mongoose-findoneandupdate-doesnt-return-updated-document }); return Promise.all(promiseArr); }; const getArticledData = (column) => { // 获取专栏里的最新的一篇文章数据 return new Promise((resolve, reject) => { const uri = `https://zhuanlan.zhihu.com/api2/columns/${column.id}/articles`; const options = { uri, json: true, }; request(options) .then((res) => { // 取每个文章的前3个 const result = res.data.slice(0, 3).map((arr) => { arr.columnId = column.id; return arr; }); resolve(result); }); }); }; const getPageSize = () => { return new Promise((resolve) => { // 获取关注的专栏的页码数 request(`${zhihuRoot}/people/anran-0423/following/columns`) .then((res) => { const $ = cheerio.load(res); const jsonData = JSON.parse($('#js-initialData').html()); const data = jsonData.initialState.entities.users; resolve(data[USER_NAME].followingColumnsCount); }) .catch((err) => { console.log(err); }); }); }; const saveArticles = (articleArr, column) => { const promiseArr = articleArr.map(async (article) => { article.columnId = column._id; ContentModel .update({id: article.id}, article, {upsert: true}) .exec(); }); return Promise.all(promiseArr); }; const init = async () => { const allNum = await getPageSize(); const pageCount = Math.ceil(allNum / pageSize); let pageArr = Array.from(new Array(pageCount), (val, index) => index); pageArr = pageArr.map(async (cur) => { const startPage = cur * pageSize; const endPage = cur * pageSize + pageSize; const columns = await exploreColumns(startPage, endPage); const articleArrs = columns.map(async (column) => { const articleArr = await getArticledData(column); saveArticles(articleArr, column); }); return Promise.all(articleArrs); }); Promise.all(pageArr) .then(() => { console.log('抓取数据成功!'); }) .catch((err) => { console.log(err); }); }; init();
主要遇到的问题: Using async/await with a forEach loop https://stackoverflow.com/questions/37576685/using-async-await-with-a-foreach-loop
The text was updated successfully, but these errors were encountered:
No branches or pull requests
主要遇到的问题:
Using async/await with a forEach loop
https://stackoverflow.com/questions/37576685/using-async-await-with-a-foreach-loop
The text was updated successfully, but these errors were encountered: