From 334f3eac21bf377e8ca07d220500842509d139ba Mon Sep 17 00:00:00 2001 From: Gabriel Date: Mon, 29 Apr 2019 21:19:01 +0200 Subject: [PATCH] first implementation of link crawling --- index.js | 111 +++++++++++++++++++++++++++++++++------------- package-lock.json | 68 ++++++++++++++++++++++++++++ package.json | 3 ++ 3 files changed, 150 insertions(+), 32 deletions(-) diff --git a/index.js b/index.js index cd04771..9efe6a6 100644 --- a/index.js +++ b/index.js @@ -2,56 +2,103 @@ const ora = require('ora'); const chalk = require('chalk'); const fs = require('fs'); const path = require('path'); +const url = require('url'); const mkdirp = require('mkdirp'); const cosmiconfig = require('cosmiconfig'); const Prerenderer = require('@prerenderer/prerenderer'); const Puppeteer = require('@prerenderer/renderer-puppeteer'); const htmlnano = require('htmlnano'); const prettyMs = require('pretty-ms'); +const isRelativeUrl = require('is-relative-url'); +const difference = require('lodash.difference'); +const cheerio = require('cheerio'); + +module.exports = async bundler => { + const { outDir, publicURL } = bundler.options; + + let routes = ['/']; // the default route + let rendererConfig = {}; + let crawl = false; + const found = await cosmiconfig('prerender').search(); + if (found) { + const { config } = found; + if (Array.isArray(config)) { + routes = config; + } else { + if (config.rendererConfig) ({ rendererConfig } = config); + if (config.routes) ({ routes } = config); + if (config.crawl) ({ crawl } = config); + } + } + + const writeHtml = async route => { + try { + const outputDir = path.join(outDir, route.route); + const file = path.normalize(`${outputDir}/index.html`); + mkdirp.sync(outputDir); + const { html } = await htmlnano.process(route.html.trim()); + fs.writeFileSync(file, html); + const end = Date.now(); + } catch (err) { + console.error(err); + } + }; + + const prerenderer = new Prerenderer({ + staticDir: outDir, + renderer: new Puppeteer(rendererConfig), + }); + + const prerenderRoutes = async routesToPrerender => { + const results = await prerenderer.renderRoutes(routesToPrerender); + // write html files + await Promise.all(results.map(writeHtml)); + + if (crawl) { + const moreRoutes = results + .reduce((acc, { html, originalRoute }) => { + $ = cheerio.load(html); + + return [ + ...acc, + ...$('a') + .map((_, el) => { + const href = $(el).attr('href'); + const pathname = isRelativeUrl(href) + ? url.resolve(originalRoute, href) + : href; + + return pathname; + }) + .get(), + ]; + }, []) + .filter(route => route.startsWith(publicURL)); + + const newRoutes = difference(moreRoutes, routes); + routes = [...routes, ...newRoutes]; + if (newRoutes.length) { + await prerenderRoutes(newRoutes); + } + } + }; -module.exports = bundler => { bundler.on('buildEnd', async () => { if (process.env.NODE_ENV !== 'production') return; console.log(''); const spinner = ora(chalk.grey('Prerendering')).start(); - let routes = ['/']; // the default route - let rendererConfig = {}; - const found = await cosmiconfig('prerender').search(); - if (found) { - const { config } = found; - if (Array.isArray(config)) { - routes = config; - } else { - if (config.rendererConfig) ({ rendererConfig } = config); - if (config.routes) ({ routes } = config); - } - } - const { outDir } = bundler.options; - const prerenderer = new Prerenderer({ - staticDir: outDir, - renderer: new Puppeteer(rendererConfig), - }); + try { await prerenderer.initialize(); const start = Date.now(); - const renderedRoutes = await prerenderer.renderRoutes(routes); + await prerenderRoutes(routes); const end = Date.now(); - await Promise.all(renderedRoutes.map(async route => { - try { - const outputDir = path.join(outDir, route.route); - const file = path.normalize(`${outputDir}/index.html`); - mkdirp.sync(outputDir); - const {html} = await htmlnano.process(route.html.trim()); - fs.writeFileSync(file, html); - const end = Date.now(); - } catch (err) { - console.error(err); - } - })); + spinner.stopAndPersist({ symbol: '✨ ', - text: chalk.green(`Prerendered in ${prettyMs(end - start)}.`) + text: chalk.green(`Prerendered in ${prettyMs(end - start)}.`), }); + prerenderer.destroy(); } catch (err) { prerenderer.destroy(); diff --git a/package-lock.json b/package-lock.json index 03a3188..818b961 100644 --- a/package-lock.json +++ b/package-lock.json @@ -23,6 +23,11 @@ "puppeteer": "^1.7.0" } }, + "@types/node": { + "version": "11.13.8", + "resolved": "https://registry.npmjs.org/@types/node/-/node-11.13.8.tgz", + "integrity": "sha512-szA3x/3miL90ZJxUCzx9haNbK5/zmPieGraZEe4WI+3srN0eGLiT22NXeMHmyhNEopn+IrxqMc7wdVwvPl8meg==" + }, "@types/q": { "version": "1.5.2", "resolved": "https://registry.npmjs.org/@types/q/-/q-1.5.2.tgz", @@ -405,6 +410,49 @@ "supports-color": "^5.3.0" } }, + "cheerio": { + "version": "1.0.0-rc.3", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.3.tgz", + "integrity": "sha512-0td5ijfUPuubwLUu0OBoe98gZj8C/AA+RW3v67GPlGOrvxWjZmBXiBCRU+I8VEiNyJzjth40POfHiz2RB3gImA==", + "requires": { + "css-select": "~1.2.0", + "dom-serializer": "~0.1.1", + "entities": "~1.1.1", + "htmlparser2": "^3.9.1", + "lodash": "^4.15.0", + "parse5": "^3.0.1" + }, + "dependencies": { + "css-select": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", + "requires": { + "boolbase": "~1.0.0", + "css-what": "2.1", + "domutils": "1.5.1", + "nth-check": "~1.0.1" + } + }, + "domutils": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", + "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", + "requires": { + "dom-serializer": "0", + "domelementtype": "1" + } + }, + "parse5": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-3.0.3.tgz", + "integrity": "sha512-rgO9Zg5LLLkfJF9E6CCmXlSE4UVceloys8JrFqCcHloC3usd/kJCyPDwH2SOlzix2j3xaP9sUX3e8+kvkuleAA==", + "requires": { + "@types/node": "*" + } + } + } + }, "class-utils": { "version": "0.3.6", "resolved": "https://registry.npmjs.org/class-utils/-/class-utils-0.3.6.tgz", @@ -1753,6 +1801,21 @@ "has": "^1.0.1" } }, + "is-relative-url": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-relative-url/-/is-relative-url-3.0.0.tgz", + "integrity": "sha512-U1iSYRlY2GIMGuZx7gezlB5dp1Kheaym7zKzO1PV06mOihiWTXejLwm4poEJysPyXF+HtK/BEd0DVlcCh30pEA==", + "requires": { + "is-absolute-url": "^3.0.0" + }, + "dependencies": { + "is-absolute-url": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/is-absolute-url/-/is-absolute-url-3.0.0.tgz", + "integrity": "sha512-3OkP8XrM2Xq4/IxsJnClfMp3OaM3TAatLPLKPeWcxLBTrpe6hihwtX+XZfJTcXg/FTRi4qjy0y/C5qiyNxY24g==" + } + } + }, "is-resolvable": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/is-resolvable/-/is-resolvable-1.1.0.tgz", @@ -1901,6 +1964,11 @@ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" }, + "lodash.difference": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.difference/-/lodash.difference-4.5.0.tgz", + "integrity": "sha1-nMtOUF1Ia5FlE0V3KIWi3yf9AXw=" + }, "lodash.memoize": { "version": "4.1.2", "resolved": "https://registry.npmjs.org/lodash.memoize/-/lodash.memoize-4.1.2.tgz", diff --git a/package.json b/package.json index dfafb3d..e6ef684 100644 --- a/package.json +++ b/package.json @@ -20,8 +20,11 @@ "@prerenderer/prerenderer": "^0.7.2", "@prerenderer/renderer-puppeteer": "^0.2.0", "chalk": "^2.4.2", + "cheerio": "^1.0.0-rc.3", "cosmiconfig": "^5.2.0", "htmlnano": "^0.2.3", + "is-relative-url": "^3.0.0", + "lodash.difference": "^4.5.0", "mkdirp": "^0.5.1", "ora": "^3.4.0", "pretty-ms": "^5.0.0"