Skip to content
This repository has been archived by the owner on Nov 29, 2023. It is now read-only.

Fix/various pdf fixes #577

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions app/lib/headless-browser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
const puppeteer = require('puppeteer');

const args = ['--no-startup-window'];
const userDataDir = './chromium-cache';

/**
* This class approach makes it easy to open multiple browser instances with
* different arguments in case that is ever required.
Copy link
Member Author

@MartijnR MartijnR Jul 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI, in OpenClinica's fork they actually do this with custom 'headless' API endpoints that serve to import records, run validation on them, and add comments to questions with errors. Those endpoints use a different headless browser config to optimize performance for that purpose (without stylesheets etc).

*/
class BrowserHandler {
constructor() {
const launchBrowser = async () => {
this.browser = false;
this.browser = await puppeteer.launch({
headless: true,
devtools: false,
args,
userDataDir,
});
this.browser.on('disconnected', launchBrowser);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

relaunch if the browser is shut down for any reason

};

(async () => {
await launchBrowser();
})();
}
}

const getBrowser = (handler) =>
new Promise((resolve) => {
const browserCheck = setInterval(() => {
if (handler.browser !== false) {
clearInterval(browserCheck);
resolve(handler.browser);
}
}, 100);
});

module.exports = { BrowserHandler, getBrowser };
61 changes: 39 additions & 22 deletions app/lib/pdf.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
/**
* @module pdf
*/
const { URL } = require('url');
const config = require('../models/config-model').server;
const { BrowserHandler, getBrowser } = require('./headless-browser');

const browserHandler = new BrowserHandler();
const { timeout } = config.headless;
const puppeteer = require('puppeteer');
const { URL } = require('url');

/**
* @typedef PdfGetOptions
Expand Down Expand Up @@ -35,35 +36,52 @@ const DEFAULTS = {
* @param {PdfGetOptions} [options] - PDF options
* @return { Promise } a promise that returns the PDF
*/
async function get(url, options = {}) {
async function get(
url,
{
format = DEFAULTS.FORMAT,
margin = DEFAULTS.MARGIN,
landscape = DEFAULTS.LANDSCAPE,
scale = DEFAULTS.SCALE,
} = {}
) {
if (!url) {
throw new Error('No url provided');
}

options.format = options.format || DEFAULTS.FORMAT;
options.margin = options.margin || DEFAULTS.MARGIN;
options.landscape = options.landscape || DEFAULTS.LANDSCAPE;
options.scale = options.scale || DEFAULTS.SCALE;

const urlObj = new URL(url);
urlObj.searchParams.append('format', options.format);
urlObj.searchParams.append('margin', options.margin);
urlObj.searchParams.append('landscape', options.landscape);
urlObj.searchParams.append('scale', options.scale);
urlObj.searchParams.append('format', format);
urlObj.searchParams.append('margin', margin);
urlObj.searchParams.append('landscape', landscape);
urlObj.searchParams.append('scale', scale);

const browser = await puppeteer.launch({ headless: true });
const browser = await getBrowser(browserHandler);
const page = await browser.newPage();

let pdf;

try {
await page
// To use an eventhandler here and catch a specific error,
// we have to return a Promise (in this case one that never resolves).
const detect401 = new Promise((resolve, reject) => {
page.on('requestfinished', (request) => {
if (request.response().status() === 401) {
const e = new Error('Authentication required');
e.status = 401;
reject(e);
}
});
});
const goToPage = page
.goto(urlObj.href, { waitUntil: 'networkidle0', timeout })
.catch((e) => {
e.status = /timeout/i.test(e.message) ? 408 : 400;
throw e;
});

// Either a 401 error is thrown or goto succeeds (or encounters a real loading error)
await Promise.race([detect401, goToPage]);

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As you can see, it was a challenge to properly catch a 401 response... I would certainly be impressed if y'all can point to a better solution :).

/*
* This works around an issue with puppeteer not printing canvas
* images that were loaded from a file.
Expand Down Expand Up @@ -93,15 +111,15 @@ async function get(url, options = {}) {
});

pdf = await page.pdf({
landscape: options.landscape,
format: options.format,
landscape,
format,
margin: {
top: options.margin,
left: options.margin,
right: options.margin,
bottom: options.margin,
top: margin,
left: margin,
right: margin,
bottom: margin,
},
scale: options.scale,
scale,
printBackground: true,
timeout,
});
Expand All @@ -112,7 +130,6 @@ async function get(url, options = {}) {
}

await page.close();
await browser.close();

return pdf;
}
Expand Down
Loading