From 72a123040d60b4c4c820fdbd165cd9fc81ee5563 Mon Sep 17 00:00:00 2001 From: Martijn van de Rijdt Date: Tue, 25 Jul 2023 14:15:18 -0400 Subject: [PATCH 1/3] changed: Optimizes pdf generation performance by limiting browser opening/closing and adding caching, --- app/lib/headless-browser.js | 39 +++++++++++++++++++++++++++++++++++++ app/lib/pdf.js | 6 +++--- 2 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 app/lib/headless-browser.js diff --git a/app/lib/headless-browser.js b/app/lib/headless-browser.js new file mode 100644 index 00000000..d149374c --- /dev/null +++ b/app/lib/headless-browser.js @@ -0,0 +1,39 @@ +const puppeteer = require('puppeteer'); + +const args = ['--no-startup-window']; +const userDataDir = './chromium-cache'; + +/** + * This class approach makes it easy to open multiple browser instances with + * different arguments in case that is ever required. + */ +class BrowserHandler { + constructor() { + const launchBrowser = async () => { + this.browser = false; + this.browser = await puppeteer.launch({ + headless: true, + devtools: false, + args, + userDataDir, + }); + this.browser.on('disconnected', launchBrowser); + }; + + (async () => { + await launchBrowser(); + })(); + } +} + +const getBrowser = (handler) => + new Promise((resolve) => { + const browserCheck = setInterval(() => { + if (handler.browser !== false) { + clearInterval(browserCheck); + resolve(handler.browser); + } + }, 100); + }); + +module.exports = { BrowserHandler, getBrowser }; diff --git a/app/lib/pdf.js b/app/lib/pdf.js index f25a482f..03c9e8ab 100644 --- a/app/lib/pdf.js +++ b/app/lib/pdf.js @@ -2,9 +2,10 @@ * @module pdf */ const config = require('../models/config-model').server; +const { BrowserHandler, getBrowser } = require('./headless-browser'); +const browserHandler = new BrowserHandler(); const { timeout } = config.headless; -const puppeteer = require('puppeteer'); const { URL } = require('url'); /** @@ -51,7 +52,7 @@ async function get(url, options = {}) { urlObj.searchParams.append('landscape', options.landscape); urlObj.searchParams.append('scale', options.scale); - const browser = await puppeteer.launch({ headless: true }); + const browser = await getBrowser(browserHandler); const page = await browser.newPage(); let pdf; @@ -112,7 +113,6 @@ async function get(url, options = {}) { } await page.close(); - await browser.close(); return pdf; } From 8ca51a5faa0ebceeba9e67d2755d92c3b08b9cc4 Mon Sep 17 00:00:00 2001 From: Martijn van de Rijdt Date: Tue, 25 Jul 2023 14:15:56 -0400 Subject: [PATCH 2/3] changed: refactored code --- app/lib/pdf.js | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/app/lib/pdf.js b/app/lib/pdf.js index 03c9e8ab..b77eb854 100644 --- a/app/lib/pdf.js +++ b/app/lib/pdf.js @@ -1,12 +1,12 @@ /** * @module pdf */ +const { URL } = require('url'); const config = require('../models/config-model').server; const { BrowserHandler, getBrowser } = require('./headless-browser'); const browserHandler = new BrowserHandler(); const { timeout } = config.headless; -const { URL } = require('url'); /** * @typedef PdfGetOptions @@ -36,21 +36,24 @@ const DEFAULTS = { * @param {PdfGetOptions} [options] - PDF options * @return { Promise } a promise that returns the PDF */ -async function get(url, options = {}) { +async function get( + url, + { + format = DEFAULTS.FORMAT, + margin = DEFAULTS.MARGIN, + landscape = DEFAULTS.LANDSCAPE, + scale = DEFAULTS.SCALE, + } = {} +) { if (!url) { throw new Error('No url provided'); } - options.format = options.format || DEFAULTS.FORMAT; - options.margin = options.margin || DEFAULTS.MARGIN; - options.landscape = options.landscape || DEFAULTS.LANDSCAPE; - options.scale = options.scale || DEFAULTS.SCALE; - const urlObj = new URL(url); - urlObj.searchParams.append('format', options.format); - urlObj.searchParams.append('margin', options.margin); - urlObj.searchParams.append('landscape', options.landscape); - urlObj.searchParams.append('scale', options.scale); + urlObj.searchParams.append('format', format); + urlObj.searchParams.append('margin', margin); + urlObj.searchParams.append('landscape', landscape); + urlObj.searchParams.append('scale', scale); const browser = await getBrowser(browserHandler); const page = await browser.newPage(); @@ -94,15 +97,15 @@ async function get(url, options = {}) { }); pdf = await page.pdf({ - landscape: options.landscape, - format: options.format, + landscape, + format, margin: { - top: options.margin, - left: options.margin, - right: options.margin, - bottom: options.margin, + top: margin, + left: margin, + right: margin, + bottom: margin, }, - scale: options.scale, + scale, printBackground: true, timeout, }); From 181346df279ecb4c9956ea19596140582ebbcfae Mon Sep 17 00:00:00 2001 From: Martijn van de Rijdt Date: Tue, 25 Jul 2023 14:18:15 -0400 Subject: [PATCH 3/3] fixed: if a PDF endpoint encounters an authentication error during PDF generation it does not provide an error response, https://github.com/OpenClinica/enketo-express-oc/issues/688 --- app/lib/pdf.js | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/app/lib/pdf.js b/app/lib/pdf.js index b77eb854..e5ab7603 100644 --- a/app/lib/pdf.js +++ b/app/lib/pdf.js @@ -61,13 +61,27 @@ async function get( let pdf; try { - await page + // To use an eventhandler here and catch a specific error, + // we have to return a Promise (in this case one that never resolves). + const detect401 = new Promise((resolve, reject) => { + page.on('requestfinished', (request) => { + if (request.response().status() === 401) { + const e = new Error('Authentication required'); + e.status = 401; + reject(e); + } + }); + }); + const goToPage = page .goto(urlObj.href, { waitUntil: 'networkidle0', timeout }) .catch((e) => { e.status = /timeout/i.test(e.message) ? 408 : 400; throw e; }); + // Either a 401 error is thrown or goto succeeds (or encounters a real loading error) + await Promise.race([detect401, goToPage]); + /* * This works around an issue with puppeteer not printing canvas * images that were loaded from a file.