Skip to content

Commit f0bc51a

Browse files
authored
🐕 Refactor ordering of attempted fetch methods (#69)
1 parent 9936e5e commit f0bc51a

File tree

2 files changed

+69
-43
lines changed

2 files changed

+69
-43
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'jats-fetch': patch
3+
---
4+
5+
Refactor ordering of attempted fetch methods

packages/jats-fetch/src/download.ts

Lines changed: 64 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ import { defaultFetcher } from './utils.js';
2020
*
2121
* Throws on bad response and warns if response content type is not xml
2222
*/
23-
async function downloadFromUrl(
23+
async function downloadFromUrlOrThrow(
2424
session: ISession,
2525
jatsUrl: string,
2626
opts: ResolutionOptions,
@@ -52,6 +52,23 @@ async function downloadFromUrl(
5252
return data;
5353
}
5454

55+
/**
56+
* Attempt to download JATS from url and return success or fail download result
57+
*/
58+
async function downloadFromUrl(
59+
session: ISession,
60+
jatsUrl: string,
61+
opts: ResolutionOptions,
62+
): Promise<DownloadResult> {
63+
try {
64+
const data = await downloadFromUrlOrThrow(session, jatsUrl, opts);
65+
if (data) return { success: true, source: jatsUrl, data };
66+
} catch (error) {
67+
session.log.debug((error as Error).message);
68+
}
69+
return { success: false, source: jatsUrl };
70+
}
71+
5572
type DoiLink = {
5673
URL: string;
5774
'content-type'?: 'application/xml' | 'application/pdf' | 'unspecified' | string;
@@ -101,54 +118,56 @@ async function getJatsUrlFromDoi(
101118
/**
102119
* Attempt to download JATS from provided input
103120
*
104-
* `urlOrDoi` may be (1) a local file, in which case, the file content is
121+
* `urlOrId` may be (1) a local file, in which case, the file content is
105122
* directly returned, (2) PubMed ID, PubMed Central ID, or DOI, in which case,
106123
* possible download links are constructed and followed, or (3) a direct
107124
* download URL, in which case, the content is fetched.
108125
*/
109126
export async function downloadJatsFromUrl(
110127
session: ISession,
111-
urlOrDoi: string,
128+
urlOrId: string,
112129
opts: ResolutionOptions = {},
113130
): Promise<DownloadResult> {
114-
if (fs.existsSync(urlOrDoi)) {
115-
session.log.debug(`JATS returned from local file ${urlOrDoi}`);
116-
const data = fs.readFileSync(urlOrDoi).toString();
117-
return { success: true, source: urlOrDoi, data };
131+
let result: DownloadResult = { success: false, source: urlOrId };
132+
if (fs.existsSync(urlOrId)) {
133+
session.log.debug(`JATS returned from local file ${urlOrId}`);
134+
const data = fs.readFileSync(urlOrId).toString();
135+
return { success: true, source: urlOrId, data };
118136
}
119-
const expectedUrls: string[] = [];
120-
try {
121-
// Custom resolvers are prioritized; in these cases we will know more about the "flavor" of the JATS
122-
expectedUrls.push(await customResolveJatsUrlFromDoi(session, urlOrDoi, opts));
123-
} catch {
124-
session.log.debug(`No custom resolvers match ${urlOrDoi}`);
125-
}
126-
expectedUrls.push(
127-
...(
128-
await Promise.all([
129-
constructJatsUrlFromPubMedCentral(session, urlOrDoi, opts),
130-
getJatsUrlFromDoi(session, urlOrDoi, opts),
131-
])
132-
).filter((u): u is string => !!u),
133-
);
134-
if (isUrl(urlOrDoi)) {
135-
expectedUrls.push(urlOrDoi);
136-
}
137-
if (expectedUrls.length > 0) {
138-
session.log.debug(['Trying URLs:\n', ...expectedUrls.map((url) => ` ${url}\n`)].join(' - '));
139-
for (let index = 0; index < expectedUrls.length; index++) {
140-
const url = expectedUrls[index];
141-
try {
142-
const data = await downloadFromUrl(session, url, opts);
143-
if (data) return { success: true, source: url, data };
144-
} catch (error) {
145-
session.log.debug((error as Error).message);
146-
}
137+
if (doi.validate(urlOrId)) {
138+
let url: string | undefined;
139+
try {
140+
url = await customResolveJatsUrlFromDoi(session, urlOrId, opts);
141+
} catch (error) {
142+
session.log.debug((error as Error).message);
143+
}
144+
if (url) {
145+
result = await downloadFromUrl(session, url, opts);
146+
if (result.success && result.data) return result;
147+
}
148+
url = await getJatsUrlFromDoi(session, urlOrId, opts);
149+
if (url) {
150+
result = await downloadFromUrl(session, url, opts);
151+
if (result.success && result.data) return result;
152+
}
153+
}
154+
if (urlOrId.startsWith('PMC')) {
155+
result = await getPubMedJatsFromS3(session, urlOrId);
156+
if (result.success && result.data) return result;
157+
}
158+
if (urlOrId.startsWith('PMC') || doi.validate(urlOrId)) {
159+
const url = await constructJatsUrlFromPubMedCentral(session, urlOrId, opts);
160+
if (url) {
161+
result = await downloadFromUrl(session, url, opts);
147162
}
148-
return { success: false, source: expectedUrls[0] };
163+
return result;
164+
}
165+
if (isUrl(urlOrId)) {
166+
result = await downloadFromUrl(session, urlOrId, opts);
167+
return result;
149168
}
150-
session.log.debug(`Could not find ${urlOrDoi} locally or resolve it to a valid JATS url`);
151-
return { success: false, source: urlOrDoi };
169+
session.log.debug(`Could not find ${urlOrId} locally or resolve it to a valid JATS url`);
170+
return result;
152171
}
153172

154173
/**
@@ -178,6 +197,7 @@ export async function jatsFetch(
178197
}
179198
let output = opts.output;
180199
let filename: string | undefined;
200+
let altInput: string | undefined;
181201
if (input.endsWith('.tar.gz')) {
182202
// If input looks like a data repository URL, assume we want the data.
183203
opts.data = true;
@@ -192,14 +212,16 @@ export async function jatsFetch(
192212
const pmcid = await convertPMID2PMCID(session, input);
193213
if (pmcid) {
194214
session.log.debug(`Resolved input ${input} to PMC ID: ${pmcid}`);
215+
// We cannot do anything with original PMID input at this point, so override it
195216
input = pmcid;
196217
}
197218
}
198219
if (doi.validate(input)) {
199220
const pmcid = await convertDOI2PMCID(session, input);
200221
if (pmcid) {
201222
session.log.debug(`Resolved input ${input} to PMC ID: ${pmcid}`);
202-
input = pmcid;
223+
// We still may be able to use original DOI input, so keep both DOI and PMC
224+
altInput = pmcid;
203225
}
204226
}
205227
if (!output) output = opts.data ? `${input}` : '.';
@@ -215,13 +237,12 @@ export async function jatsFetch(
215237
// This downloads all data and renames JATS - it will throw if it does not work
216238
result = await getPubMedJatsFromData(session, input, path.dirname(output), opts.listing);
217239
}
218-
// We can do better with doi/pubmed -> PMC conversions to use this path more
219-
if (!result?.data && input.startsWith('PMC')) {
220-
result = await getPubMedJatsFromS3(session, input);
221-
}
222240
if (!result?.data) {
223241
result = await downloadJatsFromUrl(session, input);
224242
}
243+
if (!result?.data && altInput) {
244+
result = await downloadJatsFromUrl(session, altInput);
245+
}
225246
if (!result?.data && input.startsWith('PMC')) {
226247
// Downloading all the data for just the XML should be last resort
227248
result = await getPubMedJatsFromData(session, input, path.dirname(output), opts.listing);

0 commit comments

Comments
 (0)