@@ -20,7 +20,7 @@ import { defaultFetcher } from './utils.js';
2020 *
2121 * Throws on bad response and warns if response content type is not xml
2222 */
23- async function downloadFromUrl (
23+ async function downloadFromUrlOrThrow (
2424 session : ISession ,
2525 jatsUrl : string ,
2626 opts : ResolutionOptions ,
@@ -52,6 +52,23 @@ async function downloadFromUrl(
5252 return data ;
5353}
5454
55+ /**
56+ * Attempt to download JATS from url and return success or fail download result
57+ */
58+ async function downloadFromUrl (
59+ session : ISession ,
60+ jatsUrl : string ,
61+ opts : ResolutionOptions ,
62+ ) : Promise < DownloadResult > {
63+ try {
64+ const data = await downloadFromUrlOrThrow ( session , jatsUrl , opts ) ;
65+ if ( data ) return { success : true , source : jatsUrl , data } ;
66+ } catch ( error ) {
67+ session . log . debug ( ( error as Error ) . message ) ;
68+ }
69+ return { success : false , source : jatsUrl } ;
70+ }
71+
5572type DoiLink = {
5673 URL : string ;
5774 'content-type' ?: 'application/xml' | 'application/pdf' | 'unspecified' | string ;
@@ -101,54 +118,56 @@ async function getJatsUrlFromDoi(
101118/**
102119 * Attempt to download JATS from provided input
103120 *
104- * `urlOrDoi ` may be (1) a local file, in which case, the file content is
121+ * `urlOrId ` may be (1) a local file, in which case, the file content is
105122 * directly returned, (2) PubMed ID, PubMed Central ID, or DOI, in which case,
106123 * possible download links are constructed and followed, or (3) a direct
107124 * download URL, in which case, the content is fetched.
108125 */
109126export async function downloadJatsFromUrl (
110127 session : ISession ,
111- urlOrDoi : string ,
128+ urlOrId : string ,
112129 opts : ResolutionOptions = { } ,
113130) : Promise < DownloadResult > {
114- if ( fs . existsSync ( urlOrDoi ) ) {
115- session . log . debug ( `JATS returned from local file ${ urlOrDoi } ` ) ;
116- const data = fs . readFileSync ( urlOrDoi ) . toString ( ) ;
117- return { success : true , source : urlOrDoi , data } ;
131+ let result : DownloadResult = { success : false , source : urlOrId } ;
132+ if ( fs . existsSync ( urlOrId ) ) {
133+ session . log . debug ( `JATS returned from local file ${ urlOrId } ` ) ;
134+ const data = fs . readFileSync ( urlOrId ) . toString ( ) ;
135+ return { success : true , source : urlOrId , data } ;
118136 }
119- const expectedUrls : string [ ] = [ ] ;
120- try {
121- // Custom resolvers are prioritized; in these cases we will know more about the "flavor" of the JATS
122- expectedUrls . push ( await customResolveJatsUrlFromDoi ( session , urlOrDoi , opts ) ) ;
123- } catch {
124- session . log . debug ( `No custom resolvers match ${ urlOrDoi } ` ) ;
125- }
126- expectedUrls . push (
127- ...(
128- await Promise . all ( [
129- constructJatsUrlFromPubMedCentral ( session , urlOrDoi , opts ) ,
130- getJatsUrlFromDoi ( session , urlOrDoi , opts ) ,
131- ] )
132- ) . filter ( ( u ) : u is string => ! ! u ) ,
133- ) ;
134- if ( isUrl ( urlOrDoi ) ) {
135- expectedUrls . push ( urlOrDoi ) ;
136- }
137- if ( expectedUrls . length > 0 ) {
138- session . log . debug ( [ 'Trying URLs:\n' , ...expectedUrls . map ( ( url ) => ` ${ url } \n` ) ] . join ( ' - ' ) ) ;
139- for ( let index = 0 ; index < expectedUrls . length ; index ++ ) {
140- const url = expectedUrls [ index ] ;
141- try {
142- const data = await downloadFromUrl ( session , url , opts ) ;
143- if ( data ) return { success : true , source : url , data } ;
144- } catch ( error ) {
145- session . log . debug ( ( error as Error ) . message ) ;
146- }
137+ if ( doi . validate ( urlOrId ) ) {
138+ let url : string | undefined ;
139+ try {
140+ url = await customResolveJatsUrlFromDoi ( session , urlOrId , opts ) ;
141+ } catch ( error ) {
142+ session . log . debug ( ( error as Error ) . message ) ;
143+ }
144+ if ( url ) {
145+ result = await downloadFromUrl ( session , url , opts ) ;
146+ if ( result . success && result . data ) return result ;
147+ }
148+ url = await getJatsUrlFromDoi ( session , urlOrId , opts ) ;
149+ if ( url ) {
150+ result = await downloadFromUrl ( session , url , opts ) ;
151+ if ( result . success && result . data ) return result ;
152+ }
153+ }
154+ if ( urlOrId . startsWith ( 'PMC' ) ) {
155+ result = await getPubMedJatsFromS3 ( session , urlOrId ) ;
156+ if ( result . success && result . data ) return result ;
157+ }
158+ if ( urlOrId . startsWith ( 'PMC' ) || doi . validate ( urlOrId ) ) {
159+ const url = await constructJatsUrlFromPubMedCentral ( session , urlOrId , opts ) ;
160+ if ( url ) {
161+ result = await downloadFromUrl ( session , url , opts ) ;
147162 }
148- return { success : false , source : expectedUrls [ 0 ] } ;
163+ return result ;
164+ }
165+ if ( isUrl ( urlOrId ) ) {
166+ result = await downloadFromUrl ( session , urlOrId , opts ) ;
167+ return result ;
149168 }
150- session . log . debug ( `Could not find ${ urlOrDoi } locally or resolve it to a valid JATS url` ) ;
151- return { success : false , source : urlOrDoi } ;
169+ session . log . debug ( `Could not find ${ urlOrId } locally or resolve it to a valid JATS url` ) ;
170+ return result ;
152171}
153172
154173/**
@@ -178,6 +197,7 @@ export async function jatsFetch(
178197 }
179198 let output = opts . output ;
180199 let filename : string | undefined ;
200+ let altInput : string | undefined ;
181201 if ( input . endsWith ( '.tar.gz' ) ) {
182202 // If input looks like a data repository URL, assume we want the data.
183203 opts . data = true ;
@@ -192,14 +212,16 @@ export async function jatsFetch(
192212 const pmcid = await convertPMID2PMCID ( session , input ) ;
193213 if ( pmcid ) {
194214 session . log . debug ( `Resolved input ${ input } to PMC ID: ${ pmcid } ` ) ;
215+ // We cannot do anything with original PMID input at this point, so override it
195216 input = pmcid ;
196217 }
197218 }
198219 if ( doi . validate ( input ) ) {
199220 const pmcid = await convertDOI2PMCID ( session , input ) ;
200221 if ( pmcid ) {
201222 session . log . debug ( `Resolved input ${ input } to PMC ID: ${ pmcid } ` ) ;
202- input = pmcid ;
223+ // We still may be able to use original DOI input, so keep both DOI and PMC
224+ altInput = pmcid ;
203225 }
204226 }
205227 if ( ! output ) output = opts . data ? `${ input } ` : '.' ;
@@ -215,13 +237,12 @@ export async function jatsFetch(
215237 // This downloads all data and renames JATS - it will throw if it does not work
216238 result = await getPubMedJatsFromData ( session , input , path . dirname ( output ) , opts . listing ) ;
217239 }
218- // We can do better with doi/pubmed -> PMC conversions to use this path more
219- if ( ! result ?. data && input . startsWith ( 'PMC' ) ) {
220- result = await getPubMedJatsFromS3 ( session , input ) ;
221- }
222240 if ( ! result ?. data ) {
223241 result = await downloadJatsFromUrl ( session , input ) ;
224242 }
243+ if ( ! result ?. data && altInput ) {
244+ result = await downloadJatsFromUrl ( session , altInput ) ;
245+ }
225246 if ( ! result ?. data && input . startsWith ( 'PMC' ) ) {
226247 // Downloading all the data for just the XML should be last resort
227248 result = await getPubMedJatsFromData ( session , input , path . dirname ( output ) , opts . listing ) ;
0 commit comments