|
| 1 | +using Pkg |
| 2 | +Pkg.instantiate() |
| 3 | + |
| 4 | +using HTTP |
| 5 | +using JSON |
| 6 | + |
| 7 | +""" |
| 8 | + DocumenterSearchEntry |
| 9 | +
|
| 10 | +JSON schema that Documenter.jl uses for its search index. For an example, see: |
| 11 | +https://github.com/TuringLang/DynamicPPL.jl/blob/gh-pages/v0.39.1/search_index.js |
| 12 | +""" |
| 13 | +struct DocumenterSearchEntry |
| 14 | + location::String |
| 15 | + page::String |
| 16 | + title::String |
| 17 | + text::String |
| 18 | + category::String |
| 19 | +end |
| 20 | + |
| 21 | +""" |
| 22 | + QuartoSearchEntry |
| 23 | +
|
| 24 | +JSON schema that Quarto uses for its search index. For an example, see: |
| 25 | +https://github.com/TuringLang/docs/blob/gh-pages/search_original.json |
| 26 | +""" |
| 27 | +struct QuartoSearchEntry |
| 28 | + objectID::String |
| 29 | + href::String |
| 30 | + "title of page" |
| 31 | + title::String |
| 32 | + "section name if applicable" |
| 33 | + section::String |
| 34 | + text::String |
| 35 | + crumbs::Union{Vector{String},Nothing} |
| 36 | +end |
| 37 | + |
| 38 | +""" |
| 39 | + QuartoSearchEntry(doc_entry::DocumenterSearchEntry) -> QuartoSearchEntry |
| 40 | +
|
| 41 | +Converts a `DocumenterSearchEntry` to a `QuartoSearchEntry`. |
| 42 | +""" |
| 43 | +function QuartoSearchEntry(doc_entry::DocumenterSearchEntry, repo::String)::QuartoSearchEntry |
| 44 | + # Because our links are relative to turinglang.org/docs/, an entry from say |
| 45 | + # DynamicPPL.jl will need to be prepended with `../DynamicPPL.jl/stable` to work |
| 46 | + # correctly. |
| 47 | + location = if occursin("#", doc_entry.location) |
| 48 | + # When opening a Documenter.jl page, if the query parameter `q` is nonempty, it will |
| 49 | + # open up a search bar with that query prefilled. In contrast Quarto stores the |
| 50 | + # query parameter in case the search bar is reopened, but it doesn't actually |
| 51 | + # open the actual search bar. |
| 52 | + # |
| 53 | + # Now, if you search for `your_search_term`, Quarto always adds in |
| 54 | + # `?q=your_search_term` to its search bar links. This allows search bars to be |
| 55 | + # 'prefilled' the next time you open it up. We want to _disable_ this behaviour for |
| 56 | + # Documenter.jl links, because it's quite annoying navigating somewhere else and |
| 57 | + # having an ugly search bar pop up, so we sneak in an empty query parameter into the |
| 58 | + # URL. This is a real hack, but the alternative would be to modify Quarto's JS code |
| 59 | + # itself, which is probably worse. |
| 60 | + # |
| 61 | + # Note that query params should come before anchors, hence the order here. |
| 62 | + before_anchor, anchor = split(doc_entry.location, "#"; limit=2) |
| 63 | + location = before_anchor * "?q=#" * anchor |
| 64 | + joinpath("..", repo, "stable", location) |
| 65 | + else |
| 66 | + # See above for reasoning. |
| 67 | + joinpath("..", repo, "stable", doc_entry.location, "?q=") |
| 68 | + end |
| 69 | + return QuartoSearchEntry( |
| 70 | + # objectID |
| 71 | + location, |
| 72 | + # href |
| 73 | + location, |
| 74 | + # title |
| 75 | + "[$repo] $(doc_entry.page)", |
| 76 | + # section |
| 77 | + doc_entry.title, |
| 78 | + # text |
| 79 | + doc_entry.text, |
| 80 | + # crumbs (no idea what to put here) |
| 81 | + nothing, |
| 82 | + ) |
| 83 | +end |
| 84 | + |
| 85 | +""" |
| 86 | + get_quarto_search_index() -> Vector{QuartoSearchEntry} |
| 87 | +
|
| 88 | +Fetches the Quarto search index either from a local file (if the docs have already been |
| 89 | +built); if not, fetches it from the TuringLang/docs GitHub repository. |
| 90 | +""" |
| 91 | +function get_quarto_search_index() |
| 92 | + search_index = if isfile("_site/search.json") |
| 93 | + @info "Using local search index..." |
| 94 | + JSON.parsefile("_site/search.json", Vector{QuartoSearchEntry}) |
| 95 | + else |
| 96 | + @info "Downloading search index from GitHub..." |
| 97 | + resp = HTTP.get( |
| 98 | + "https://raw.githubusercontent.com/TuringLang/docs/refs/heads/gh-pages/search_original.json" |
| 99 | + ) |
| 100 | + JSON.parse(String(resp.body), Vector{QuartoSearchEntry}) |
| 101 | + end |
| 102 | + # Based on manual inspection of the search index, it appears that the `objectID` and |
| 103 | + # `href` attributes should match. I don't know if Quarto guarantees this, so we warn |
| 104 | + # just in case they don't. |
| 105 | + for entry in search_index |
| 106 | + if entry.objectID != entry.href |
| 107 | + @warn "mismatched objectID and href" objectID=entry.objectID href=entry.href |
| 108 | + end |
| 109 | + end |
| 110 | + return search_index |
| 111 | +end |
| 112 | + |
| 113 | +""" |
| 114 | + get_documenter_search_index(repo::String) -> Vector{DocumenterSearchEntry} |
| 115 | +
|
| 116 | +Fetches the Documenter.jl search index for the given repository from the published |
| 117 | +documentation. This assumes that there is a 'stable' version of the docs (if this isn't the |
| 118 | +case, it should definitely be fixed in the upstream repo by triggering a new release with a |
| 119 | +working Documenter build.) |
| 120 | +""" |
| 121 | +function get_documenter_search_index(repo::String) |
| 122 | + url = "https://turinglang.org/$repo/stable/search_index.js" |
| 123 | + @info "Downloading Documenter.jl search index from $url" |
| 124 | + contents = String(HTTP.get(url).body) |
| 125 | + # This file is actually a JavaScript file that says |
| 126 | + # var documenterSearchIndex = {"docs": [ ... ]}; |
| 127 | + # We only want the dictionary, but we should probably check that that file does actually |
| 128 | + # start with that. |
| 129 | + prefix = r"^var documenterSearchIndex = " |
| 130 | + if !occursin(prefix, contents) |
| 131 | + error("Unexpected format of search_index.js file") |
| 132 | + end |
| 133 | + json = replace(contents, prefix => "") |
| 134 | + return JSON.parse(json, Dict{String, Vector{DocumenterSearchEntry}})["docs"] |
| 135 | +end |
| 136 | + |
| 137 | +# TODO: Do we also want to include search results from main site? It generally doesn't seem |
| 138 | +# like a very meaningful thing to include in the search, and it can clutter actual useful |
| 139 | +# results. See e.g. https://github.com/TuringLang/docs/issues/634 |
| 140 | +# I'm going to say no for now. |
| 141 | + |
| 142 | +repos = [ |
| 143 | + "Turing.jl", |
| 144 | + "DynamicPPL.jl", |
| 145 | + "Bijectors.jl", |
| 146 | + "JuliaBUGS.jl", |
| 147 | + "AbstractMCMC.jl", |
| 148 | + "AdvancedMH.jl", |
| 149 | + "AdvancedHMC.jl", |
| 150 | + "AdvancedVI.jl", |
| 151 | + "MCMCChains.jl", |
| 152 | + "MCMCDiagnosticTools.jl", |
| 153 | + "SliceSampling.jl", |
| 154 | + "EllipticalSliceSampling.jl", |
| 155 | +] |
| 156 | +# Get docs entries |
| 157 | +all_entries = get_quarto_search_index() |
| 158 | +@info "Fetched $(length(all_entries)) entries from main docs" |
| 159 | +# Get entries from other repos |
| 160 | +for repo in repos |
| 161 | + doc_entries = get_documenter_search_index(repo) |
| 162 | + @info "Fetched $(length(doc_entries)) entries from $repo" |
| 163 | + quarto_entries = QuartoSearchEntry.(doc_entries, repo) |
| 164 | + append!(all_entries, quarto_entries) |
| 165 | +end |
| 166 | + |
| 167 | +# Check that we are running from repo root |
| 168 | +if !isdir("_site") |
| 169 | + error("This script must be run from the root of the repository") |
| 170 | +end |
| 171 | +# Move the old search index out of the way and write the new combined one |
| 172 | +output_file = "_site/search.json" |
| 173 | +Base.rename(output_file, "_site/search_original.json"; force=true) |
| 174 | +JSON.json(output_file, all_entries; pretty=2) |
| 175 | +@info "Wrote $(length(all_entries)) entries to $output_file" |
0 commit comments