diff --git a/package.json b/package.json
index 9e5316afa3..3fe952765d 100644
--- a/package.json
+++ b/package.json
@@ -174,8 +174,7 @@
"postcss": "latest",
"postcss-focus": "latest",
"simple-git-hooks": "latest",
- "standard": "latest",
- "standard-markdown": "latest"
+ "standard": "latest"
},
"engines": {
"node": ">= 16"
@@ -189,7 +188,7 @@
"coverage": "c8 report --reporter=text-lcov > coverage/lcov.info",
"dev": "concurrently \"gulp\" \"npm run dev:server\"",
"dev:server": "browser-sync start --server --files \"index.html, README.md, static/**/*.(css|js)\"",
- "lint": "standard-markdown README.md && standard",
+ "lint": "standard",
"pretest": "npm run lint",
"release": "lerna publish --yes --sort --conventional-commits -m \"chore(release): %s\" --create-release github",
"test": "c8 pnpm --recursive test",
@@ -213,9 +212,6 @@
"prettier-standard",
"standard --fix"
],
- "*.md": [
- "standard-markdown"
- ],
"package.json": [
"finepack"
]
diff --git a/packages/metascraper-helpers/package.json b/packages/metascraper-helpers/package.json
index bb76be2003..db88980034 100644
--- a/packages/metascraper-helpers/package.json
+++ b/packages/metascraper-helpers/package.json
@@ -3,7 +3,7 @@
"description": "Collection of helper functions used by metascraper",
"homepage": "https://github.com/microlinkhq/metascraper/packages/metascraper-helpers",
"version": "5.46.11",
- "main": "index.js",
+ "main": "src/index.js",
"author": {
"email": "hello@microlink.io",
"name": "microlink.io",
diff --git a/packages/metascraper-helpers/index.js b/packages/metascraper-helpers/src/index.js
similarity index 93%
rename from packages/metascraper-helpers/index.js
rename to packages/metascraper-helpers/src/index.js
index 9668822441..bf65c751ee 100644
--- a/packages/metascraper-helpers/index.js
+++ b/packages/metascraper-helpers/src/index.js
@@ -5,7 +5,6 @@ const debug = require('debug-logfmt')('metascraper:find-rule')
const condenseWhitespace = require('condense-whitespace')
const { getExtension: mimeExtension } = require('mime')
const capitalize = require('microsoft-capitalize')
-const { JSDOM, VirtualConsole } = require('jsdom')
const isRelativeUrl = require('is-relative-url')
const fileExtension = require('file-extension')
const _normalizeUrl = require('normalize-url')
@@ -466,40 +465,6 @@ const composeRule =
const has = value =>
value !== undefined && !Number.isNaN(value) && hasValues(value)
-const loadIframe = (url, $, { timeout = 5000 } = {}) =>
- new Promise(resolve => {
- const dom = new JSDOM($.html(), {
- url,
- virtualConsole: new VirtualConsole(),
- runScripts: 'dangerously',
- resources: 'usable'
- })
-
- const done = (html = '') => resolve($.load(html))
-
- const listen = (element, method, fn) =>
- element[`${method}EventListener`]('load', fn, {
- capture: true,
- once: true,
- passive: true
- })
-
- const iframe = dom.window.document.querySelector('iframe')
- if (!iframe) return done()
-
- const timer = setTimeout(() => {
- listen(iframe, 'remove', load)
- done()
- }, timeout)
-
- function load () {
- clearTimeout(timer)
- done(iframe.contentDocument.documentElement.outerHTML)
- }
-
- listen(iframe, 'add', load)
- })
-
const getUrls = input => String(input).match(urlRegexForMatch) ?? []
module.exports = {
@@ -536,7 +501,7 @@ module.exports = {
iso6393,
jsonld,
lang,
- loadIframe,
+ loadIframe: require('./load-iframe'),
logo,
memoizeOne,
mimeExtension,
diff --git a/packages/metascraper-helpers/src/load-iframe/index.js b/packages/metascraper-helpers/src/load-iframe/index.js
new file mode 100644
index 0000000000..c12f965288
--- /dev/null
+++ b/packages/metascraper-helpers/src/load-iframe/index.js
@@ -0,0 +1,18 @@
+'use strict'
+
+const { Worker } = require('worker_threads')
+const path = require('path')
+
+const SCRIPT_PATH = path.resolve(__dirname, 'worker.js')
+
+module.exports = (url, $, { timeout = 5000 } = {}) => {
+ const worker = new Worker(SCRIPT_PATH, {
+ workerData: { url, html: $.html(), timeout },
+ stdout: true,
+ stderr: true
+ })
+ const { promise, resolve, reject } = Promise.withResolvers()
+ worker.on('message', html => resolve($.load(html || '')))
+ worker.on('error', reject)
+ return promise
+}
diff --git a/packages/metascraper-helpers/src/load-iframe/worker.js b/packages/metascraper-helpers/src/load-iframe/worker.js
new file mode 100644
index 0000000000..0a23956884
--- /dev/null
+++ b/packages/metascraper-helpers/src/load-iframe/worker.js
@@ -0,0 +1,37 @@
+'use strict'
+
+const { workerData, parentPort } = require('node:worker_threads')
+const { JSDOM, VirtualConsole } = require('jsdom')
+
+async function main ({ url, html, timeout }) {
+ const dom = new JSDOM(html, {
+ url,
+ virtualConsole: new VirtualConsole(),
+ runScripts: 'dangerously',
+ resources: 'usable'
+ })
+
+ const iframe = dom.window.document.querySelector('iframe')
+ if (!iframe) return
+
+ let timeoutId
+
+ const waitForIframe = new Promise(resolve => {
+ iframe.addEventListener(
+ 'load',
+ () => {
+ clearTimeout(timeoutId)
+ resolve(iframe.contentDocument.documentElement.outerHTML)
+ },
+ { once: true }
+ )
+ })
+
+ const timeoutReached = new Promise(
+ resolve => (timeoutId = setTimeout(resolve, timeout))
+ )
+
+ return Promise.race([waitForIframe, timeoutReached])
+}
+
+main(workerData).then(html => parentPort.postMessage(html))
diff --git a/packages/metascraper-helpers/test/index.js b/packages/metascraper-helpers/test/index.js
index cd26d6d34d..2cdfd5bbd7 100644
--- a/packages/metascraper-helpers/test/index.js
+++ b/packages/metascraper-helpers/test/index.js
@@ -29,7 +29,7 @@ const {
parseUrl,
url,
video
-} = require('..')
+} = require('../src')
const measure = fn => {
const time = process.hrtime()
diff --git a/packages/metascraper-helpers/test/load-iframe.js b/packages/metascraper-helpers/test/load-iframe.js
index ba03d3760d..79b8a62429 100644
--- a/packages/metascraper-helpers/test/load-iframe.js
+++ b/packages/metascraper-helpers/test/load-iframe.js
@@ -22,3 +22,14 @@ test('wait `load` event', async t => {
const $iframe = await loadIframe(url, $)
t.true($iframe.html().includes('twitter:player'))
})
+
+test('markup is correct', async t => {
+ const url =
+ 'https://saas.transistor.fm/episodes/paul-jarvis-gaining-freedom-by-building-an-indie-business'
+ const src = 'https://share.transistor.fm/e/e83b42d0'
+ const $ = await loadIframe(
+ url,
+ cheerio.load(``)
+ )
+ t.snapshot($.html())
+})
diff --git a/packages/metascraper-helpers/test/snapshots/load-iframe.js.md b/packages/metascraper-helpers/test/snapshots/load-iframe.js.md
new file mode 100644
index 0000000000..c1b83cc0e2
--- /dev/null
+++ b/packages/metascraper-helpers/test/snapshots/load-iframe.js.md
@@ -0,0 +1,680 @@
+# Snapshot report for `test/load-iframe.js`
+
+The actual snapshot is saved in `load-iframe.js.snap`.
+
+Generated by [AVA](https://avajs.dev).
+
+## markup is correct
+
+> Snapshot 1
+
+ `
␊
+ ␊
+ ␊
+ Paul Jarvis: gaining freedom by building an indie business - Audio player ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
40 ? 30 : 10)" @keyup.document.left="seekBySeconds(-10)" @keyup.document.m="toggleMute" @keyup.document.s="toggleSpeed" @play="play(false, true)" @loadedmetadata="handleLoadedMetadata" @pause="pause(true)" preload="none" @timejump.window="seekToSeconds($event.detail.timestamp); shareTimeFormatted = formatTime($event.detail.timestamp)" src="https://2.gum.fm/op3.dev/e/dts.podtrac.com/redirect.mp3/media.transistor.fm/e83b42d0/9e93424b.mp3?src=player"> ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ 10 ␊
+ ␊
+ ␊
+ ␊
+
1X ␊
+ ␊
+
40 ? 30 : 10)" class="seek-seconds-button" title="Fast Forward 30 seconds">␊
+ 30 ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ 00:00 ␊
+ 01:11:43 ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ Paul Jarvis: gaining freedom by building an indie business ␊
+ ␊
+ 72 min ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ ␊
+
␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ Start at ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ Build Your SaaS ␊
+ Trailer ␊
+ Bonus ␊
+ Episode 155 ␊
+ Season 1 ␊
+
␊
+ ␊
+
Paul Jarvis: gaining freedom by building an indie business ␊
+ ␊
+
␊
+ ␊
+
Justin catches up with his old internet friend Paul Jarvis. Today, Paul co-founded
Fathom Analytics with
Jack Ellis : a simple alternative to Google Analytics. Paul is also the author of the book "
Company of One ," which has influenced a whole generation of indie entrepreneurs (and has been reviewed by Cal Newport, Chris Guillebeau, Ben Chestnut, Tiago Forte, and more). Previously, Justin and Paul did a
weekly mastermind , where they supported and encouraged each other around our indie businesses. They decided to do a catch-up call and recorded it so you could listen in. 👍
Highlights: Links:
␊
+
Thanks to our monthly supporters ␊
+
␊
+ Pascal from sharpen.page ␊
+ Rewardful.com ␊
+ Greg Park ␊
+ Mitchell Davis from RecruitKit.com.au ␊
+ Marcel Fahle, wearebold.af ␊
+ Bill Condo (@mavrck) ␊
+ Ward from MemberSpace.com ␊
+ Evandro Sasse ␊
+ Austin Loveless ␊
+ Michael Sitver ␊
+ Colin Gray ␊
+ Dave Giunta ␊
+ ␊
+
␊
+ ★ Support this podcast on Patreon ★ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ ␊
+ 00:10 ␊
+ Intro ␊
+ ␊
+ 02:20 ␊
+ Being off the internet ␊
+ ␊
+ 03:58 ␊
+ What's a typical day for Paul? ␊
+ ␊
+ 06:21 ␊
+ Looking back at our Mastermind call ␊
+ ␊
+ 08:08 ␊
+ There's no beginning and no end ␊
+ ␊
+ 10:36 ␊
+ Things that are out of your control affect your business ␊
+ ␊
+ 13:08 ␊
+ Does Justin's surfing metaphor make sense to a surfer? ␊
+ ␊
+ 16:11 ␊
+ How would you start an indie business in 2023? ␊
+ ␊
+ 22:05 ␊
+ You've got to get in motion ␊
+ ␊
+ 25:08 ␊
+ Using products in your category for a long time ␊
+ ␊
+ 27:53 ␊
+ Is there still any room in Saas? ␊
+ ␊
+ 31:56 ␊
+ The act of making the bet ␊
+ ␊
+ 38:45 ␊
+ Is freelancing still viable in 2023? ␊
+ ␊
+ 42:55 ␊
+ Company design is lifestyle design ␊
+ ␊
+ 45:00 ␊
+ Worrying about being stagnant ␊
+ ␊
+ 47:20 ␊
+ How do you handle customer feature requests? ␊
+ ␊
+ 52:08 ␊
+ It's ok to be late to a shift in the market ␊
+ ␊
+ 58:24 ␊
+ Caring is an indie advantage ␊
+ ␊
+ 01:05:05 ␊
+ Collaboration is what gets us anywhere ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ ␊
+
␊
+ ␊
+ ␊
+ ␊
+ `
diff --git a/packages/metascraper-helpers/test/snapshots/load-iframe.js.snap b/packages/metascraper-helpers/test/snapshots/load-iframe.js.snap
new file mode 100644
index 0000000000..9521e35cf0
Binary files /dev/null and b/packages/metascraper-helpers/test/snapshots/load-iframe.js.snap differ
diff --git a/packages/metascraper-readability/benchmark/document-write.js b/packages/metascraper-readability/benchmark/document-write.js
index 3cd4a4d38a..0bbd01a7b9 100644
--- a/packages/metascraper-readability/benchmark/document-write.js
+++ b/packages/metascraper-readability/benchmark/document-write.js
@@ -2,9 +2,10 @@
const { Window } = require('happy-dom')
const { readFileSync } = require('fs')
+const path = require('path')
const url = 'https://arxiv.org/pdf/2412.06592'
-const html = readFileSync('./fixture.html', 'utf8')
+const html = readFileSync(path.resolve(__dirname, './fixture.html'), 'utf8')
const isEqual = (value1, value2) =>
JSON.stringify(value1) === JSON.stringify(value2)
@@ -20,7 +21,7 @@ const cases = {
const window = new Window({ url })
const document = window.document
document.write(html)
- await window.happyDOM.waitUntilComplete()
+ // await window.happyDOM.waitUntilComplete()
return document
}
}
diff --git a/packages/metascraper-readability/src/index.js b/packages/metascraper-readability/src/index.js
index 76b116fc57..3531a9686f 100644
--- a/packages/metascraper-readability/src/index.js
+++ b/packages/metascraper-readability/src/index.js
@@ -9,7 +9,9 @@ const SCRIPT_PATH = path.resolve(__dirname, 'worker.js')
const readability = asyncMemoizeOne((url, html, readabilityOpts) => {
const worker = new Worker(SCRIPT_PATH, {
- workerData: { url, html, readabilityOpts }
+ workerData: { url, html, readabilityOpts },
+ stdout: true,
+ stderr: true
})
const { promise, resolve, reject } = Promise.withResolvers()
worker.on('message', message => resolve(JSON.parse(message)))
diff --git a/packages/metascraper-readability/src/worker.js b/packages/metascraper-readability/src/worker.js
index 36d4fdad14..bf4a53f905 100644
--- a/packages/metascraper-readability/src/worker.js
+++ b/packages/metascraper-readability/src/worker.js
@@ -15,7 +15,7 @@ const getDocument = ({ url, html }) => {
const { Window } = require('happy-dom')
const window = new Window({ url })
const document = window.document
- document.documentElement.innerHTML = html
+ document.write(html)
return document
}
diff --git a/packages/metascraper/test/integration/the-verge/snapshots/index.js.md b/packages/metascraper/test/integration/the-verge/snapshots/index.js.md
index 78d0f557dd..7a0805ce5e 100644
--- a/packages/metascraper/test/integration/the-verge/snapshots/index.js.md
+++ b/packages/metascraper/test/integration/the-verge/snapshots/index.js.md
@@ -10,7 +10,7 @@ Generated by [AVA](https://avajs.dev).
{
audio: null,
- author: null,
+ author: 'Nick Statt',
date: '2016-05-24T20:49:03.000Z',
description: 'Apple could open Siri to third-party apps very soon',
image: 'https://img.connatix.com/pid-13bd7676-705c-4894-8449-b8e67fcddc1f/8ffb59d5-b093-4d68-aa90-1dd263c5d84d/1_th.jpg?crop=600:338,smart&width=600&height=338&quality=60&fit=crop',
diff --git a/packages/metascraper/test/integration/the-verge/snapshots/index.js.snap b/packages/metascraper/test/integration/the-verge/snapshots/index.js.snap
index 4b80c682d4..e0dff8ea67 100644
Binary files a/packages/metascraper/test/integration/the-verge/snapshots/index.js.snap and b/packages/metascraper/test/integration/the-verge/snapshots/index.js.snap differ
diff --git a/packages/metascraper/test/integration/transistor/snapshots/index.js.md b/packages/metascraper/test/integration/transistor/snapshots/index.js.md
index b5c4bef335..f6c56afd21 100644
--- a/packages/metascraper/test/integration/transistor/snapshots/index.js.md
+++ b/packages/metascraper/test/integration/transistor/snapshots/index.js.md
@@ -15,7 +15,7 @@ Generated by [AVA](https://avajs.dev).
description: null,
image: 'https://images.transistor.fm/file/transistor/images/episode/263474/medium_1590257639-artwork.jpg',
lang: null,
- logo: 'https://transistor.fm/favicon.ico',
+ logo: 'https://assets.transistor.fm/assets/favicon-6e847568a3fbd949ba82be5cb10951e28b51f7f58d6e573235b0cddf494de2c0.ico',
publisher: '#032 – Before and After Product-Market Fit with Peter and Calvin from Segment',
title: 'Transistor Embed | #032 – Before and After Product-Market Fit with Peter and Calvin from Segment',
url: 'https://share.transistor.fm/e/70c487ed',
diff --git a/packages/metascraper/test/integration/transistor/snapshots/index.js.snap b/packages/metascraper/test/integration/transistor/snapshots/index.js.snap
index 06b77c31bd..b986c54e47 100644
Binary files a/packages/metascraper/test/integration/transistor/snapshots/index.js.snap and b/packages/metascraper/test/integration/transistor/snapshots/index.js.snap differ
diff --git a/packages/metascraper/test/integration/vimeo/snapshots/index.js.md b/packages/metascraper/test/integration/vimeo/snapshots/index.js.md
index 8de79a0620..7b1c2f3543 100644
--- a/packages/metascraper/test/integration/vimeo/snapshots/index.js.md
+++ b/packages/metascraper/test/integration/vimeo/snapshots/index.js.md
@@ -13,7 +13,7 @@ Generated by [AVA](https://avajs.dev).
author: 'MEGAFORCE',
date: '2021-10-22T08:58:48.000Z',
description: 'FULL CREDITS: AGENCY & PRODUCTION: @riffrafffilms CREATIVE & DIRECTION: @the_megaforce EXEC PRODUCER/OWNER: Matthew Fone PRODUCER: @cathyhoodx PRODUCTION…',
- image: 'https://i.vimeocdn.com/custom_asset/21a684b9352e090b70ace38910e58e70',
+ image: 'https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F1280513620-2e984a8e28be47e615461106d5ff44a4e8d9bf6f03d53c7e2_1280x853&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png',
lang: 'en',
logo: 'https://i.vimeocdn.com/favicon/main-touch_180',
publisher: 'Vimeo',
diff --git a/packages/metascraper/test/integration/vimeo/snapshots/index.js.snap b/packages/metascraper/test/integration/vimeo/snapshots/index.js.snap
index 48a3655a25..94fe6b2c62 100644
Binary files a/packages/metascraper/test/integration/vimeo/snapshots/index.js.snap and b/packages/metascraper/test/integration/vimeo/snapshots/index.js.snap differ