Skip to content

Commit a2535e0

Browse files
committed
WIP
1 parent f57d8fc commit a2535e0

File tree

2 files changed

+23
-46
lines changed

2 files changed

+23
-46
lines changed

packages/metascraper-readability/src/index.js

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,21 +2,32 @@
22

33
const { memoizeOne, composeRule } = require('@metascraper/helpers')
44
const asyncMemoizeOne = require('async-memoize-one')
5-
const { Worker } = require('worker_threads')
6-
const path = require('path')
5+
const { Readability } = require('@mozilla/readability')
76

8-
const SCRIPT_PATH = path.resolve(__dirname, 'worker.js')
7+
const errorCapture =
8+
process.env.NODE_ENV === 'test' ? 'tryAndCatch' : 'processLevel'
99

10-
const readability = asyncMemoizeOne((url, html, readabilityOpts) => {
11-
const worker = new Worker(SCRIPT_PATH, {
12-
workerData: { url, html, readabilityOpts },
13-
stdout: true,
14-
stderr: true
10+
const parseReader = (reader) => {
11+
const parsed = reader.parse()
12+
return parsed || {}
13+
}
14+
15+
16+
const getDocument = ({ url, html }) => {
17+
const { Window } = require('happy-dom')
18+
const window = new Window({
19+
url,
20+
settings: { errorCapture }
1521
})
16-
const { promise, resolve, reject } = Promise.withResolvers()
17-
worker.on('message', message => resolve(JSON.parse(message)))
18-
worker.on('error', reject)
19-
return promise
22+
const document = window.document
23+
document.write(html)
24+
return document
25+
}
26+
27+
const readability = asyncMemoizeOne((url, html, readabilityOpts) => {
28+
const document = getDocument({ url, html })
29+
const reader = new Readability(document, readabilityOpts)
30+
return parseReader(reader)
2031
}, memoizeOne.EqualityFirstArgument)
2132

2233
module.exports = ({ readabilityOpts } = {}) => {

packages/metascraper-readability/src/worker.js

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)