|
2 | 2 |
|
3 | 3 | const { memoizeOne, composeRule } = require('@metascraper/helpers') |
4 | 4 | const asyncMemoizeOne = require('async-memoize-one') |
5 | | -const { Worker } = require('worker_threads') |
6 | | -const path = require('path') |
| 5 | +const { Readability } = require('@mozilla/readability') |
7 | 6 |
|
8 | | -const SCRIPT_PATH = path.resolve(__dirname, 'worker.js') |
| 7 | +const errorCapture = |
| 8 | + process.env.NODE_ENV === 'test' ? 'tryAndCatch' : 'processLevel' |
9 | 9 |
|
10 | | -const readability = asyncMemoizeOne((url, html, readabilityOpts) => { |
11 | | - const worker = new Worker(SCRIPT_PATH, { |
12 | | - workerData: { url, html, readabilityOpts }, |
13 | | - stdout: true, |
14 | | - stderr: true |
| 10 | +const parseReader = (reader) => { |
| 11 | + const parsed = reader.parse() |
| 12 | + return parsed || {} |
| 13 | +} |
| 14 | + |
| 15 | + |
| 16 | +const getDocument = ({ url, html }) => { |
| 17 | + const { Window } = require('happy-dom') |
| 18 | + const window = new Window({ |
| 19 | + url, |
| 20 | + settings: { errorCapture } |
15 | 21 | }) |
16 | | - const { promise, resolve, reject } = Promise.withResolvers() |
17 | | - worker.on('message', message => resolve(JSON.parse(message))) |
18 | | - worker.on('error', reject) |
19 | | - return promise |
| 22 | + const document = window.document |
| 23 | + document.write(html) |
| 24 | + return document |
| 25 | +} |
| 26 | + |
| 27 | +const readability = asyncMemoizeOne((url, html, readabilityOpts) => { |
| 28 | + const document = getDocument({ url, html }) |
| 29 | + const reader = new Readability(document, readabilityOpts) |
| 30 | + return parseReader(reader) |
20 | 31 | }, memoizeOne.EqualityFirstArgument) |
21 | 32 |
|
22 | 33 | module.exports = ({ readabilityOpts } = {}) => { |
|
0 commit comments