Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,7 @@
"postcss": "latest",
"postcss-focus": "latest",
"simple-git-hooks": "latest",
"standard": "latest",
"standard-markdown": "latest"
"standard": "latest"
},
"engines": {
"node": ">= 16"
Expand All @@ -189,7 +188,7 @@
"coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
"dev": "concurrently \"gulp\" \"npm run dev:server\"",
"dev:server": "browser-sync start --server --files \"index.html, README.md, static/**/*.(css|js)\"",
"lint": "standard-markdown README.md && standard",
"lint": "standard",
"pretest": "npm run lint",
"release": "lerna publish --yes --sort --conventional-commits -m \"chore(release): %s\" --create-release github",
"test": "c8 pnpm --recursive test",
Expand All @@ -213,9 +212,6 @@
"prettier-standard",
"standard --fix"
],
"*.md": [
"standard-markdown"
],
"package.json": [
"finepack"
]
Expand Down
2 changes: 1 addition & 1 deletion packages/metascraper-helpers/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"description": "Collection of helper functions used by metascraper",
"homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-helpers",
"version": "5.46.11",
"main": "index.js",
"main": "src/index.js",
"author": {
"email": "[email protected]",
"name": "microlink.io",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ const debug = require('debug-logfmt')('metascraper:find-rule')
const condenseWhitespace = require('condense-whitespace')
const { getExtension: mimeExtension } = require('mime')
const capitalize = require('microsoft-capitalize')
const { JSDOM, VirtualConsole } = require('jsdom')
const isRelativeUrl = require('is-relative-url')
const fileExtension = require('file-extension')
const _normalizeUrl = require('normalize-url')
Expand Down Expand Up @@ -466,40 +465,6 @@ const composeRule =
const has = value =>
value !== undefined && !Number.isNaN(value) && hasValues(value)

const loadIframe = (url, $, { timeout = 5000 } = {}) =>
new Promise(resolve => {
const dom = new JSDOM($.html(), {
url,
virtualConsole: new VirtualConsole(),
runScripts: 'dangerously',
resources: 'usable'
})

const done = (html = '') => resolve($.load(html))

const listen = (element, method, fn) =>
element[`${method}EventListener`]('load', fn, {
capture: true,
once: true,
passive: true
})

const iframe = dom.window.document.querySelector('iframe')
if (!iframe) return done()

const timer = setTimeout(() => {
listen(iframe, 'remove', load)
done()
}, timeout)

function load () {
clearTimeout(timer)
done(iframe.contentDocument.documentElement.outerHTML)
}

listen(iframe, 'add', load)
})

const getUrls = input => String(input).match(urlRegexForMatch) ?? []

module.exports = {
Expand Down Expand Up @@ -536,7 +501,7 @@ module.exports = {
iso6393,
jsonld,
lang,
loadIframe,
loadIframe: require('./load-iframe'),
logo,
memoizeOne,
mimeExtension,
Expand Down
18 changes: 18 additions & 0 deletions packages/metascraper-helpers/src/load-iframe/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
'use strict'

const { Worker } = require('worker_threads')
const path = require('path')

const SCRIPT_PATH = path.resolve(__dirname, 'worker.js')

module.exports = (url, $, { timeout = 5000 } = {}) => {
const worker = new Worker(SCRIPT_PATH, {
workerData: { url, html: $.html(), timeout },
stdout: true,
stderr: true
})
const { promise, resolve, reject } = Promise.withResolvers()
worker.on('message', html => resolve($.load(html || '')))
worker.on('error', reject)
return promise
}
37 changes: 37 additions & 0 deletions packages/metascraper-helpers/src/load-iframe/worker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
'use strict'

const { workerData, parentPort } = require('node:worker_threads')
const { JSDOM, VirtualConsole } = require('jsdom')

async function main ({ url, html, timeout }) {
const dom = new JSDOM(html, {
url,
virtualConsole: new VirtualConsole(),
runScripts: 'dangerously',
resources: 'usable'
})

const iframe = dom.window.document.querySelector('iframe')
if (!iframe) return

let timeoutId

const waitForIframe = new Promise(resolve => {
iframe.addEventListener(
'load',
() => {
clearTimeout(timeoutId)
resolve(iframe.contentDocument.documentElement.outerHTML)
},
{ once: true }
)
})

const timeoutReached = new Promise(
resolve => (timeoutId = setTimeout(resolve, timeout))
)

return Promise.race([waitForIframe, timeoutReached])
}

main(workerData).then(html => parentPort.postMessage(html))
2 changes: 1 addition & 1 deletion packages/metascraper-helpers/test/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ const {
parseUrl,
url,
video
} = require('..')
} = require('../src')

const measure = fn => {
const time = process.hrtime()
Expand Down
11 changes: 11 additions & 0 deletions packages/metascraper-helpers/test/load-iframe.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,14 @@ test('wait `load` event', async t => {
const $iframe = await loadIframe(url, $)
t.true($iframe.html().includes('twitter:player'))
})

test('markup is correct', async t => {
const url =
'https://saas.transistor.fm/episodes/paul-jarvis-gaining-freedom-by-building-an-indie-business'
const src = 'https://share.transistor.fm/e/e83b42d0'
const $ = await loadIframe(
url,
cheerio.load(`<iframe src="${src}"></iframe>`)
)
t.snapshot($.html())
})
Loading
Loading