Skip to content

Commit 3b5b2ef

Browse files
authored
Merge pull request #756 from microlinkhq/happy-dom
chore(helpers): run `loadIframe` in worker_threads
2 parents 4138964 + de6e0c4 commit 3b5b2ef

File tree

18 files changed

+761
-51
lines changed

18 files changed

+761
-51
lines changed

package.json

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,7 @@
174174
"postcss": "latest",
175175
"postcss-focus": "latest",
176176
"simple-git-hooks": "latest",
177-
"standard": "latest",
178-
"standard-markdown": "latest"
177+
"standard": "latest"
179178
},
180179
"engines": {
181180
"node": ">= 16"
@@ -189,7 +188,7 @@
189188
"coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
190189
"dev": "concurrently \"gulp\" \"npm run dev:server\"",
191190
"dev:server": "browser-sync start --server --files \"index.html, README.md, static/**/*.(css|js)\"",
192-
"lint": "standard-markdown README.md && standard",
191+
"lint": "standard",
193192
"pretest": "npm run lint",
194193
"release": "lerna publish --yes --sort --conventional-commits -m \"chore(release): %s\" --create-release github",
195194
"test": "c8 pnpm --recursive test",
@@ -213,9 +212,6 @@
213212
"prettier-standard",
214213
"standard --fix"
215214
],
216-
"*.md": [
217-
"standard-markdown"
218-
],
219215
"package.json": [
220216
"finepack"
221217
]

packages/metascraper-helpers/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"description": "Collection of helper functions used by metascraper",
44
"homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-helpers",
55
"version": "5.46.11",
6-
"main": "index.js",
6+
"main": "src/index.js",
77
"author": {
88
"email": "[email protected]",
99
"name": "microlink.io",

packages/metascraper-helpers/index.js renamed to packages/metascraper-helpers/src/index.js

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ const debug = require('debug-logfmt')('metascraper:find-rule')
55
const condenseWhitespace = require('condense-whitespace')
66
const { getExtension: mimeExtension } = require('mime')
77
const capitalize = require('microsoft-capitalize')
8-
const { JSDOM, VirtualConsole } = require('jsdom')
98
const isRelativeUrl = require('is-relative-url')
109
const fileExtension = require('file-extension')
1110
const _normalizeUrl = require('normalize-url')
@@ -466,40 +465,6 @@ const composeRule =
466465
const has = value =>
467466
value !== undefined && !Number.isNaN(value) && hasValues(value)
468467

469-
const loadIframe = (url, $, { timeout = 5000 } = {}) =>
470-
new Promise(resolve => {
471-
const dom = new JSDOM($.html(), {
472-
url,
473-
virtualConsole: new VirtualConsole(),
474-
runScripts: 'dangerously',
475-
resources: 'usable'
476-
})
477-
478-
const done = (html = '') => resolve($.load(html))
479-
480-
const listen = (element, method, fn) =>
481-
element[`${method}EventListener`]('load', fn, {
482-
capture: true,
483-
once: true,
484-
passive: true
485-
})
486-
487-
const iframe = dom.window.document.querySelector('iframe')
488-
if (!iframe) return done()
489-
490-
const timer = setTimeout(() => {
491-
listen(iframe, 'remove', load)
492-
done()
493-
}, timeout)
494-
495-
function load () {
496-
clearTimeout(timer)
497-
done(iframe.contentDocument.documentElement.outerHTML)
498-
}
499-
500-
listen(iframe, 'add', load)
501-
})
502-
503468
const getUrls = input => String(input).match(urlRegexForMatch) ?? []
504469

505470
module.exports = {
@@ -536,7 +501,7 @@ module.exports = {
536501
iso6393,
537502
jsonld,
538503
lang,
539-
loadIframe,
504+
loadIframe: require('./load-iframe'),
540505
logo,
541506
memoizeOne,
542507
mimeExtension,
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
'use strict'
2+
3+
const { Worker } = require('worker_threads')
4+
const path = require('path')
5+
6+
const SCRIPT_PATH = path.resolve(__dirname, 'worker.js')
7+
8+
module.exports = (url, $, { timeout = 5000 } = {}) => {
9+
const worker = new Worker(SCRIPT_PATH, {
10+
workerData: { url, html: $.html(), timeout },
11+
stdout: true,
12+
stderr: true
13+
})
14+
const { promise, resolve, reject } = Promise.withResolvers()
15+
worker.on('message', html => resolve($.load(html || '')))
16+
worker.on('error', reject)
17+
return promise
18+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
'use strict'
2+
3+
const { workerData, parentPort } = require('node:worker_threads')
4+
const { JSDOM, VirtualConsole } = require('jsdom')
5+
6+
async function main ({ url, html, timeout }) {
7+
const dom = new JSDOM(html, {
8+
url,
9+
virtualConsole: new VirtualConsole(),
10+
runScripts: 'dangerously',
11+
resources: 'usable'
12+
})
13+
14+
const iframe = dom.window.document.querySelector('iframe')
15+
if (!iframe) return
16+
17+
let timeoutId
18+
19+
const waitForIframe = new Promise(resolve => {
20+
iframe.addEventListener(
21+
'load',
22+
() => {
23+
clearTimeout(timeoutId)
24+
resolve(iframe.contentDocument.documentElement.outerHTML)
25+
},
26+
{ once: true }
27+
)
28+
})
29+
30+
const timeoutReached = new Promise(
31+
resolve => (timeoutId = setTimeout(resolve, timeout))
32+
)
33+
34+
return Promise.race([waitForIframe, timeoutReached])
35+
}
36+
37+
main(workerData).then(html => parentPort.postMessage(html))

packages/metascraper-helpers/test/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ const {
2929
parseUrl,
3030
url,
3131
video
32-
} = require('..')
32+
} = require('../src')
3333

3434
const measure = fn => {
3535
const time = process.hrtime()

packages/metascraper-helpers/test/load-iframe.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,14 @@ test('wait `load` event', async t => {
2222
const $iframe = await loadIframe(url, $)
2323
t.true($iframe.html().includes('twitter:player'))
2424
})
25+
26+
test('markup is correct', async t => {
27+
const url =
28+
'https://saas.transistor.fm/episodes/paul-jarvis-gaining-freedom-by-building-an-indie-business'
29+
const src = 'https://share.transistor.fm/e/e83b42d0'
30+
const $ = await loadIframe(
31+
url,
32+
cheerio.load(`<iframe src="${src}"></iframe>`)
33+
)
34+
t.snapshot($.html())
35+
})

0 commit comments

Comments
 (0)