Skip to content

Commit 177c238

Browse files
committed
refactor(readability): avoid worker_threads
1 parent a2535e0 commit 177c238

File tree

1 file changed

+29
-16
lines changed
  • packages/metascraper-readability/src

1 file changed

+29
-16
lines changed

packages/metascraper-readability/src/index.js

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,46 @@
11
'use strict'
22

33
const { memoizeOne, composeRule } = require('@metascraper/helpers')
4-
const asyncMemoizeOne = require('async-memoize-one')
54
const { Readability } = require('@mozilla/readability')
5+
const asyncMemoizeOne = require('async-memoize-one')
6+
const { Browser } = require('happy-dom')
67

7-
const errorCapture =
8-
process.env.NODE_ENV === 'test' ? 'tryAndCatch' : 'processLevel'
9-
10-
const parseReader = (reader) => {
8+
const parseReader = reader => {
119
const parsed = reader.parse()
1210
return parsed || {}
1311
}
1412

15-
1613
const getDocument = ({ url, html }) => {
17-
const { Window } = require('happy-dom')
18-
const window = new Window({
19-
url,
20-
settings: { errorCapture }
14+
const browser = new Browser({
15+
settings: {
16+
disableComputedStyleRendering: true,
17+
disableCSSFileLoading: true,
18+
disableIframePageLoading: true,
19+
disableJavaScriptEvaluation: true,
20+
disableJavaScriptFileLoading: true
21+
}
2122
})
22-
const document = window.document
23+
24+
const page = browser.newPage()
25+
// Set the URL without navigating to avoid fetch requests
26+
page.mainFrame.url = url
27+
const document = page.mainFrame.document
2328
document.write(html)
24-
return document
29+
30+
const teardown = () => browser.close()
31+
32+
return { document, teardown }
2533
}
2634

27-
const readability = asyncMemoizeOne((url, html, readabilityOpts) => {
28-
const document = getDocument({ url, html })
29-
const reader = new Readability(document, readabilityOpts)
30-
return parseReader(reader)
35+
const readability = asyncMemoizeOne(async (url, html, readabilityOpts) => {
36+
const { document, teardown } = getDocument({ url, html })
37+
try {
38+
const reader = new Readability(document, readabilityOpts)
39+
const result = parseReader(reader)
40+
return result
41+
} finally {
42+
await teardown()
43+
}
3144
}, memoizeOne.EqualityFirstArgument)
3245

3346
module.exports = ({ readabilityOpts } = {}) => {

0 commit comments

Comments
 (0)