|
1 | 1 | 'use strict' |
2 | 2 |
|
3 | 3 | const { memoizeOne, composeRule } = require('@metascraper/helpers') |
4 | | -const asyncMemoizeOne = require('async-memoize-one') |
5 | 4 | const { Readability } = require('@mozilla/readability') |
| 5 | +const asyncMemoizeOne = require('async-memoize-one') |
| 6 | +const { Browser } = require('happy-dom') |
6 | 7 |
|
7 | | -const errorCapture = |
8 | | - process.env.NODE_ENV === 'test' ? 'tryAndCatch' : 'processLevel' |
9 | | - |
10 | | -const parseReader = (reader) => { |
| 8 | +const parseReader = reader => { |
11 | 9 | const parsed = reader.parse() |
12 | 10 | return parsed || {} |
13 | 11 | } |
14 | 12 |
|
15 | | - |
16 | 13 | const getDocument = ({ url, html }) => { |
17 | | - const { Window } = require('happy-dom') |
18 | | - const window = new Window({ |
19 | | - url, |
20 | | - settings: { errorCapture } |
| 14 | + const browser = new Browser({ |
| 15 | + settings: { |
| 16 | + disableComputedStyleRendering: true, |
| 17 | + disableCSSFileLoading: true, |
| 18 | + disableIframePageLoading: true, |
| 19 | + disableJavaScriptEvaluation: true, |
| 20 | + disableJavaScriptFileLoading: true |
| 21 | + } |
21 | 22 | }) |
22 | | - const document = window.document |
| 23 | + |
| 24 | + const page = browser.newPage() |
| 25 | + // Set the URL without navigating to avoid fetch requests |
| 26 | + page.mainFrame.url = url |
| 27 | + const document = page.mainFrame.document |
23 | 28 | document.write(html) |
24 | | - return document |
| 29 | + |
| 30 | + const teardown = () => browser.close() |
| 31 | + |
| 32 | + return { document, teardown } |
25 | 33 | } |
26 | 34 |
|
27 | | -const readability = asyncMemoizeOne((url, html, readabilityOpts) => { |
28 | | - const document = getDocument({ url, html }) |
29 | | - const reader = new Readability(document, readabilityOpts) |
30 | | - return parseReader(reader) |
| 35 | +const readability = asyncMemoizeOne(async (url, html, readabilityOpts) => { |
| 36 | + const { document, teardown } = getDocument({ url, html }) |
| 37 | + try { |
| 38 | + const reader = new Readability(document, readabilityOpts) |
| 39 | + const result = parseReader(reader) |
| 40 | + return result |
| 41 | + } finally { |
| 42 | + await teardown() |
| 43 | + } |
31 | 44 | }, memoizeOne.EqualityFirstArgument) |
32 | 45 |
|
33 | 46 | module.exports = ({ readabilityOpts } = {}) => { |
|
0 commit comments