From 7a164d1da0171de878a6eca7eb0df937d58d07cf Mon Sep 17 00:00:00 2001 From: Spades-S Date: Sat, 23 Feb 2019 22:11:23 +0800 Subject: [PATCH] [fix] fix errors caused by the new API of leetcode.com --- .gitignore | 4 +- lib/runtimeConf.js | 2 +- lib/spider.js | 442 ++++++++++++++++++++++++--------------------- package.json | 1 + 4 files changed, 243 insertions(+), 206 deletions(-) diff --git a/.gitignore b/.gitignore index 639fd3e..2f7399a 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,6 @@ private.* log/* .DS_Store result.json -npm-debug.log \ No newline at end of file +npm-debug.log +.vscode/* +yarn.lock \ No newline at end of file diff --git a/lib/runtimeConf.js b/lib/runtimeConf.js index 08085fe..7bb50c4 100644 --- a/lib/runtimeConf.js +++ b/lib/runtimeConf.js @@ -1,4 +1,4 @@ 'use strict' module.exports = { - concurrency: 30 + concurrency: 1 } diff --git a/lib/spider.js b/lib/spider.js index 635f5d0..9611936 100644 --- a/lib/spider.js +++ b/lib/spider.js @@ -1,61 +1,58 @@ -'use strict' -const debug = require('debug')('lc-spider') -const assert = require('assert') - -const path = require('path') -const logger = require('log4js').getLogger('layout-pattern') -const baseUrl = 'https://leetcode.com/' -const requestLib = require('request') -const jar = requestLib.jar() +'use strict'; +const debug = require('debug')('lc-spider'); +const assert = require('assert'); + +const path = require('path'); +const logger = require('log4js').getLogger('layout-pattern'); +const baseUrl = 'https://leetcode.com'; +const requestLib = require('request'); +const jar = requestLib.jar(); // set default http request settings -let request = requestLib - .defaults({ - jar: jar, - baseUrl: baseUrl, - headers: { - 'Host': 'leetcode.com', - 'Cache-Control': 'max-age=0', - 'Upgrade-Insecure-Requests': '1', - 'Referer': 'https://leetcode.com/accounts/login/', - 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', - 'Content-Type': 'application/x-www-form-urlencoded' - } - }) -let thenifyAll = require('thenify-all') +let request = requestLib.defaults({ + jar: jar, + baseUrl: baseUrl, + headers: { + Origin: baseUrl, + 'Cache-Control': 'max-age=0', + 'Upgrade-Insecure-Requests': '1', + Referer: `${baseUrl}/accounts/login/`, + 'User-Agent': + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36', + Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', + 'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6', + }, +}); +let thenifyAll = require('thenify-all'); // promisify the callback-based request API -request = thenifyAll(request, {}, ['get', 'post']) -let cheerio = require('cheerio') - -let fileUtils = require('./file.js') -let resultUtils = require('./result.js') -let generateMDUtils = require('./generateMD') - -let co = require('co') -let parallel = require('co-parallel') -let executionConfig = require('./runtimeConf') - -let languageLeetcodeNameMap = require('./language').leetcodeName - -let config -let solutionsDirPath -let resultJsonPath -let leetcodeNumObj = {} - -exports.fetch = co.wrap(function * (conf, numObj) { - config = conf - solutionsDirPath = path.resolve(process.cwd(), config.outputDir) - resultJsonPath = path.resolve(solutionsDirPath, 'result.json') +request = thenifyAll(request, {}, ['get', 'post']); +let fileUtils = require('./file.js'); +let resultUtils = require('./result.js'); +let generateMDUtils = require('./generateMD'); + +let co = require('co'); +let parallel = require('co-parallel'); +let executionConfig = require('./runtimeConf'); + +let languageLeetcodeNameMap = require('./language').leetcodeName; + +let config; +let solutionsDirPath; +let resultJsonPath; +let leetcodeNumObj = {}; + +exports.fetch = co.wrap(function*(conf, numObj) { + config = conf; + solutionsDirPath = path.resolve(process.cwd(), config.outputDir); + resultJsonPath = path.resolve(solutionsDirPath, 'result.json'); /** * fetch solved problems list first * */ - let acList + let acList; try { - yield login(conf) - acList = yield fetchACLists() + yield login(conf); + acList = yield fetchACLists(); } catch (e) { - throw e + throw e; } /** @@ -63,35 +60,34 @@ exports.fetch = co.wrap(function * (conf, numObj) { * if we fetched before, then only fetch the newly submitted solutions * or if the user specified problem number, fetch the solutions of that * */ - let fetchList + let fetchList; try { if (undefined === numObj) { - fetchList = yield fetchSolutionsNotEverFetched(acList) + fetchList = yield fetchSolutionsNotEverFetched(acList); } else { - fetchList = yield fetchWithGivenNumber(acList, numObj) + fetchList = yield fetchWithGivenNumber(acList, numObj); } } catch (e) { - logger.error('error happened when forming the fetch mission') - throw e + logger.error('error happened when forming the fetch mission'); + throw e; } /** * fetch all the solutions in parallel * and every time one solution fetched, write it into file immediately * */ - let languageCodeMapArr = yield parallelFetch(fetchList) - + let languageCodeMapArr = yield parallelFetch(fetchList); /** * write the fetch result into result.json * */ - let resultObj = yield resultUtils.writeResult(languageCodeMapArr, leetcodeNumObj, resultJsonPath) - leetcodeNumObj.language = config.language + let resultObj = yield resultUtils.writeResult(languageCodeMapArr, leetcodeNumObj, resultJsonPath); + leetcodeNumObj.language = config.language; /** * generate READE.md * */ - yield generateMDUtils.generateMarkdown(resultObj, leetcodeNumObj, config.outputDir, config.template) -}) + yield generateMDUtils.generateMarkdown(resultObj, leetcodeNumObj, config.outputDir, config.template); +}); /** * Use the config json file to login @@ -101,31 +97,37 @@ exports.fetch = co.wrap(function * (conf, numObj) { * @return {Promise} * @api public */ -let login = co.wrap(function * (conf) { - config = conf - let responseAndBody = yield request.get('/accounts/login/') - let $ = cheerio.load(responseAndBody[1]) - let token = $('input[name=csrfmiddlewaretoken]').val() - - logger.info('token get') - debug('token:' + token) - - let cookie = jar.getCookies('https://leetcode.com/') - let cookieOfToken = cookie.find(element => element.key === 'csrftoken') - assert.notEqual(cookieOfToken, undefined, 'network error: cannot get csrftoken') - - yield request({method: 'POST', +let login = co.wrap(function*(conf) { + config = conf; + yield request({ + method: 'GET', url: '/accounts/login/', + }); + let cookie = jar.getCookies(baseUrl); + const csrfToken = cookie.find(element => { + return element.key === 'csrftoken'; + }); + assert.notEqual(csrfToken, undefined, 'network error: cannot get csrftoken'); + + yield request({ + method: 'POST', + url: '/accounts/login/', + cookie: `csrftoken=${csrfToken.value}`, form: { - 'csrfmiddlewaretoken': token, - 'login': conf['username'], - 'password': conf['password'] + csrfmiddlewaretoken: csrfToken.value, + login: conf['username'], + password: conf['password'], } - }) - cookie = jar.getCookies('https://leetcode.com/') - assert.notEqual(cookie.find(element => element.key === 'LEETCODE_SESSION'), undefined, 'incorrect username or password') - logger.info('login successfully') -}) + }); + + cookie = jar.getCookies(baseUrl); + assert.notEqual( + cookie.find(element => element.key === 'LEETCODE_SESSION'), + undefined, + 'incorrect username or password', + ); + logger.info('login successfully'); +}); /** * Fetch the accepted solutions' list. @@ -134,48 +136,51 @@ let login = co.wrap(function * (conf) { * @return {Array} list of solutions needed to fetch * @api public */ -let fetchACLists = co.wrap(function * () { - debug('fetch the accepted solutions\' list') +let fetchACLists = co.wrap(function*() { + debug("fetch the accepted solutions' list"); - let [, body] = yield request.get('/api/problems/algorithms/') + let [, body] = yield request.get('/api/problems/algorithms/'); try { - body = JSON.parse(body) + body = JSON.parse(body); } catch (e) { - debug(body) - throw (new Error('network error:JSON data error')) + debug(body); + throw new Error('network error:JSON data error'); } - leetcodeNumObj.total = body['num_total'] - leetcodeNumObj.solved = body['num_solved'] - leetcodeNumObj.locked = 0 - return body['stat_status_pairs'] -}) - -let fetchWithGivenNumber = co.wrap(function * (acLists, numObj) { - yield resultUtils.getResult() - return acLists.filter(element => { - if (element['paid_only']) { - leetcodeNumObj.locked++ - } - //! (element.stat['question_id'] in result) - // if we fetched the problem before, we will not do it again - return element.status === 'ac' && (element.stat['question_id'] in numObj) - }).reverse() -}) + leetcodeNumObj.total = body['num_total']; + leetcodeNumObj.solved = body['num_solved']; + leetcodeNumObj.locked = 0; + return body['stat_status_pairs']; +}); + +let fetchWithGivenNumber = co.wrap(function*(acLists, numObj) { + yield resultUtils.getResult(); + return acLists + .filter(element => { + if (element['paid_only']) { + leetcodeNumObj.locked++; + } + //! (element.stat['question_id'] in result) + // if we fetched the problem before, we will not do it again + return element.status === 'ac' && element.stat['question_id'] in numObj; + }) + .reverse(); +}); -let fetchSolutionsNotEverFetched = co.wrap(function * (acLists) { +let fetchSolutionsNotEverFetched = co.wrap(function*(acLists) { // load the last spider result from result.json // if result.json doesn't exist that means never fetched solutions before - let result = yield resultUtils.getResult(resultJsonPath) - - return acLists.filter(element => { - if (element['paid_only']) { - leetcodeNumObj.locked++ - } - //! (element.stat['question_id'] in result) - // if we fetched once we will not fetch this problem any more - return element.status === 'ac' && !(element.stat['question_id'] in result) - }).reverse() -}) + let result = yield resultUtils.getResult(resultJsonPath); + return acLists + .filter(element => { + if (element['paid_only']) { + leetcodeNumObj.locked++; + } + //! (element.stat['question_id'] in result) + // if we fetched once we will not fetch this problem any more + return element.status === 'ac' && !(element.stat['question_id'] in result); + }) + .reverse(); +}); /** * Fetch the accepted solutions' code and question. @@ -184,41 +189,42 @@ let fetchSolutionsNotEverFetched = co.wrap(function * (acLists) { * @return {Promise} * @api public */ -let parallelFetch = co.wrap(function * (acLists) { - debug('fetch solutions ') +let parallelFetch = co.wrap(function*(acLists) { + debug('fetch solutions '); if (acLists && acLists.length > 0) { // form the promises array // every promise in it can be parallel executed let acListPromises = acLists.map(acProblem => { - let languageClone = [] + let languageClone = []; // use an object to store different language's solutions - let languageCodeMap = {} + let languageCodeMap = {}; // store the problem's info in languageCodeMap - languageCodeMap._title = acProblem.stat['question__title_slug'] - languageCodeMap._id = acProblem.stat['question_id'] - languageCodeMap._level = acProblem['difficulty']['level'] - languageCodeMap._paid_only = acProblem['paid_only'] - languageCodeMap._acceptance = (acProblem['stat']['total_acs'] / acProblem['stat']['total_submitted'] * 100).toFixed(2) + '%' + languageCodeMap._title = acProblem.stat['question__title_slug']; + languageCodeMap._id = acProblem.stat['question_id']; + languageCodeMap._level = acProblem['difficulty']['level']; + languageCodeMap._paid_only = acProblem['paid_only']; + languageCodeMap._acceptance = + ((acProblem['stat']['total_acs'] / acProblem['stat']['total_submitted']) * 100).toFixed(2) + '%'; // copy the language from config file to languageClone // use it to fetch every problem's solution config.language.forEach(language => { - languageClone.push(language) - }) - logger.info('fetch ' + languageCodeMap._id + ' . ' + languageCodeMap._title) - return fetchAndWrite(acProblem, languageClone, languageCodeMap) - }) + languageClone.push(language); + }); + logger.info('fetch ' + languageCodeMap._id + ' . ' + languageCodeMap._title); + return fetchAndWrite(acProblem, languageClone, languageCodeMap); + }); // use parallel to control the number of concurrence - let languageCodeMapArr = yield parallel(acListPromises, executionConfig.concurrency) + let languageCodeMapArr = yield parallel(acListPromises, executionConfig.concurrency); // debug(languageCodeMapArr); - return languageCodeMapArr + return languageCodeMapArr; } else { - logger.info('no new solution need to be fetched') - return [] + logger.info('no new solution need to be fetched'); + return []; } -}) +}); /** * Fetch a leetcode problem's solutions and question @@ -230,12 +236,16 @@ let parallelFetch = co.wrap(function * (acLists) { * @return {Object} languageCodeMap * @api public */ -let fetchAndWrite = function * (problemInfo, languageToFetch, languageCodeMap) { - yield fetchACSolutionOfProblem(problemInfo, languageToFetch, 0, languageCodeMap) - yield fetchQuestion(problemInfo, languageCodeMap).catch(err => { throw err }) - yield fileUtils.writeToFile(languageCodeMap, solutionsDirPath).catch(err => { throw err }) - return languageCodeMap -} +let fetchAndWrite = function*(problemInfo, languageToFetch, languageCodeMap) { + yield fetchACSolutionOfProblem(problemInfo, languageToFetch, 0, languageCodeMap); + yield fetchQuestion(problemInfo, languageCodeMap).catch(err => { + throw err; + }); + yield fileUtils.writeToFile(languageCodeMap, solutionsDirPath).catch(err => { + throw err; + }); + return languageCodeMap; +}; /** * Fetch a leetcode problem's solutions and question @@ -249,68 +259,78 @@ let fetchAndWrite = function * (problemInfo, languageToFetch, languageCodeMap) { * @return {Object} languageCodeMap * @api public */ -let fetchACSolutionOfProblem = co.wrap(function * (problemInfo, languageToFetch, page, languageCodeMap) { - debug('fetch ' + problemInfo.stat['question__title_slug']) +let fetchACSolutionOfProblem = co.wrap(function*(problemInfo, languageToFetch, page, languageCodeMap) { + debug('fetch ' + problemInfo.stat['question__title_slug']); if (languageToFetch.length < 1) { - return languageCodeMap + return languageCodeMap; } - let responseAndBody, submissionsJson + let responseAndBody, submissionsJson; try { responseAndBody = yield request.get({ - url: '/api/submissions/' + problemInfo.stat['question__title_slug'] + '/?offset=' + (page * 50) + '&limit=50', + url: '/api/submissions/' + problemInfo.stat['question__title_slug'] + '/?offset=' + page * 50 + '&limit=50', headers: { - 'Accept': '*/*' - } - }) - submissionsJson = JSON.parse(responseAndBody[1])['submissions_dump'] + Accept: '*/*', + }, + }); + submissionsJson = JSON.parse(responseAndBody[1])['submissions_dump'] || []; } catch (err) { - logger.error('Fetching submissions of ' + problemInfo.stat['question__title_slug'] + ' failed') - logger.error(err.stack) - return + logger.error('Fetching submissions of ' + problemInfo.stat['question__title_slug'] + ' failed'); + logger.error(err.stack); + return; } // form the promises array - let acSolutionPromise = [] + let acSolutionPromise = []; // check the submissions list submissionsJson.forEach(e => { if (e['status_display'] !== 'Accepted') { - return + return; } - let language = languageLeetcodeNameMap[e['lang']] + let language = languageLeetcodeNameMap[e['lang']]; if (~languageToFetch.indexOf(language)) { - languageToFetch.splice(languageToFetch.indexOf(language), 1) - let codeUrl = e['url'] - debug(problemInfo.stat['question__title_slug'], codeUrl, language) - acSolutionPromise.push(fetchSolutionsOfUrl(codeUrl, 0).then(codeObj => { - languageCodeMap[language] = codeObj.code - }).catch(err => { - logger.error('Fetching the ' + language + ' code of ' + problemInfo.stat['question__title_slug'] + ' failed') - logger.error(err.stack) - })) + languageToFetch.splice(languageToFetch.indexOf(language), 1); + let codeUrl = e['url']; + debug(problemInfo.stat['question__title_slug'], codeUrl, language); + acSolutionPromise.push( + fetchSolutionsOfUrl(codeUrl, 0) + .then(codeObj => { + languageCodeMap[language] = codeObj.code; + }) + .catch(err => { + logger.error( + 'Fetching the ' + + language + + ' code of ' + + problemInfo.stat['question__title_slug'] + + ' failed', + ); + logger.error(err.stack); + }), + ); } - }) + }); // if no solution can be fetched if (acSolutionPromise.length < 1) { if (submissionsJson.length === 50 && languageToFetch.length > 0) { // then fetch the next page of submissions - return yield fetchACSolutionOfProblem(problemInfo, languageToFetch, page + 1, languageCodeMap) + return yield fetchACSolutionOfProblem(problemInfo, languageToFetch, page + 1, languageCodeMap); } else { - return languageCodeMap + return languageCodeMap; } } else { - yield acSolutionPromise + yield acSolutionPromise; if (submissionsJson.length === 50 && languageToFetch.length > 0) { - return yield fetchACSolutionOfProblem(problemInfo, languageToFetch, page + 1, languageCodeMap) + return yield fetchACSolutionOfProblem(problemInfo, languageToFetch, page + 1, languageCodeMap); } else { // fetching finished , let's return it - return languageCodeMap + return languageCodeMap; } } -}) +}); /** * Fetch a leetcode problem's question @@ -321,20 +341,34 @@ let fetchACSolutionOfProblem = co.wrap(function * (problemInfo, languageToFetch, * @return {Promise} * @api public */ -let fetchQuestion = co.wrap(function * (problemInfo, languageCodeMap) { - let responseAndBody +let fetchQuestion = co.wrap(function*(problemInfo, languageCodeMap) { + let responseAndBody; + let cookie = jar.getCookies(baseUrl); + const csrfToken = cookie.find(element => { + return element.key === 'csrftoken'; + }); + try { - responseAndBody = yield request.get('/problems/' + problemInfo.stat['question__title_slug'] + '/') + responseAndBody = yield request({ + url: '/graphql', + method: 'POST', + origin: baseUrl, + referer: `${baseUrl}/problems/two-sum/`, + 'x-newrelic-id': 'UAQDVFVRGwEAXVlbBAg=', + 'x-csrftoken': csrfToken, + json: { + operationName: 'questionData', + variables: { titleSlug: problemInfo.stat['question__title_slug'] }, + query: + 'query questionData($titleSlug: String!) {\n question(titleSlug: $titleSlug) {\n questionId\n questionFrontendId\n boundTopicId\n title\n titleSlug\n content\n translatedTitle\n translatedContent\n isPaidOnly\n difficulty\n likes\n dislikes\n isLiked\n similarQuestions\n contributors {\n username\n profileUrl\n avatarUrl\n __typename\n }\n langToValidPlayground\n topicTags {\n name\n slug\n translatedName\n __typename\n }\n companyTagStats\n codeSnippets {\n lang\n langSlug\n code\n __typename\n }\n stats\n hints\n solution {\n id\n canSeeDetail\n __typename\n }\n status\n sampleTestCase\n metaData\n judgerAvailable\n judgeType\n mysqlSchemas\n enableRunCode\n enableTestMode\n envInfo\n __typename\n }\n}\n', + }, + }); } catch (err) { - debug('network error:error happened when fetching problem \'' + problemInfo.stat['question__title_slug'] + '\'') - throw err - }; - - let $ = cheerio.load(responseAndBody[1]) - // debug('problem :'); - // debug($('meta[name=description]').attr('content')); - languageCodeMap['_question'] = $('meta[name=description]').attr('content') -}) + debug("network error:error happened when fetching problem '" + problemInfo.stat['question__title_slug'] + "'"); + throw err; + } + languageCodeMap['_question'] = responseAndBody[1].data.question.content; +}); /** * Fetch a leetcode problem's solution of given url @@ -345,36 +379,36 @@ let fetchQuestion = co.wrap(function * (problemInfo, languageCodeMap) { * @return {Promise} * @api public */ -let fetchSolutionsOfUrl = co.wrap(function * (url, times) { - let responseAndBody +let fetchSolutionsOfUrl = co.wrap(function*(url, times) { + let responseAndBody; try { - responseAndBody = yield request.get(url) + responseAndBody = yield request.get(url); } catch (e) { - debug(e.stack) + debug(e.stack); if (times < 5) { // fixme! error often occurs when fetching the solution page // so i repeat at most 5 times - return yield co(fetchSolutionsOfUrl, url, ++times) + return yield co(fetchSolutionsOfUrl, url, ++times); } - throw new Error('network error:cannot get the page of url' + url) - }; - let body = responseAndBody[1] + throw new Error('network error:cannot get the page of url' + url); + } + let body = responseAndBody[1]; - let matchResult = body.match(/submissionCode:\s*'([\s\S]*)',\s*editCodeUrl/) + let matchResult = body.match(/submissionCode:\s*'([\s\S]*)',\s*editCodeUrl/); if (matchResult === null) { if (times < 5) { // fixme! error often occurs when fetching the solution page // so i repeat at most 5 times - return yield fetchSolutionsOfUrl(url, ++times) + return yield fetchSolutionsOfUrl(url, ++times); } - debug('can not get full page of' + url) - throw new Error('network error:the page of' + url + 'is not complete') + debug('can not get full page of' + url); + throw new Error('network error:the page of' + url + 'is not complete'); } - let codeInUnicode = matchResult[1] + let codeInUnicode = matchResult[1]; /* eslint-disable no-eval */ - let code = eval("'" + codeInUnicode + "'") - debug(url + 'code get!') - return {code} -}) + let code = eval("'" + codeInUnicode + "'"); + debug(url + 'code get!'); + return { code }; +}); diff --git a/package.json b/package.json index 953a6a6..e901ab5 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "co": "4.6.0", "co-parallel": "^1.0.0", "debug": "^2.3.3", + "h2-request": "^1.1.1", "log4js": "^1.0.1", "mustache": "^2.3.0", "request": "2.79.0",