Skip to content

Commit db7ebe5

Browse files
committed
Augment search with package APIs
1 parent 6516ef0 commit db7ebe5

File tree

4 files changed

+181
-15
lines changed

4 files changed

+181
-15
lines changed

.github/workflows/publish.yml

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,6 @@ jobs:
9090
env:
9191
PATH_PREFIX: /versions/${{ env.version }}
9292

93-
- name: Rename original search index
94-
run: mv _site/search.json _site/search_original.json
95-
9693
- name: Save _freeze folder
9794
id: cache-save
9895
if: ${{ !cancelled() }}
@@ -110,18 +107,8 @@ jobs:
110107
path: ${{ steps.julia-cache.outputs.cache-paths }}
111108
key: ${{ steps.julia-cache.outputs.cache-key }}
112109

113-
- name: Fetch search_original.json from main site
114-
run: curl -O https://raw.githubusercontent.com/TuringLang/turinglang.github.io/gh-pages/search_original.json
115-
116-
- name: Convert main site search index URLs to relative URLs
117-
run: |
118-
jq 'map(
119-
if .href then .href = "../" + .href else . end |
120-
if .objectID then .objectID = "../" + .objectID else . end)' search_original.json > fixed_main_search.json
121-
122-
- name: Merge both search index
123-
run: |
124-
jq -s '.[0] + .[1]' _site/search_original.json fixed_main_search.json > _site/search.json
110+
- name: Augment search index with package APIs
111+
run: julia --project=assets/scripts/search assets/scripts/search/search.jl
125112

126113
- name: Checkout gh-pages branch
127114
uses: actions/checkout@v4

assets/scripts/search/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Manifest.toml

assets/scripts/search/Project.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[deps]
2+
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
3+
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"

assets/scripts/search/search.jl

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
using Pkg
2+
Pkg.instantiate()
3+
4+
using HTTP
5+
using JSON
6+
7+
"""
8+
DocumenterSearchEntry
9+
10+
JSON schema that Documenter.jl uses for its search index. For an example, see:
11+
https://github.com/TuringLang/DynamicPPL.jl/blob/gh-pages/v0.39.1/search_index.js
12+
"""
13+
struct DocumenterSearchEntry
14+
location::String
15+
page::String
16+
title::String
17+
text::String
18+
category::String
19+
end
20+
21+
"""
22+
QuartoSearchEntry
23+
24+
JSON schema that Quarto uses for its search index. For an example, see:
25+
https://github.com/TuringLang/docs/blob/gh-pages/search_original.json
26+
"""
27+
struct QuartoSearchEntry
28+
objectID::String
29+
href::String
30+
"title of page"
31+
title::String
32+
"section name if applicable"
33+
section::String
34+
text::String
35+
crumbs::Union{Vector{String},Nothing}
36+
end
37+
38+
"""
39+
QuartoSearchEntry(doc_entry::DocumenterSearchEntry) -> QuartoSearchEntry
40+
41+
Converts a `DocumenterSearchEntry` to a `QuartoSearchEntry`.
42+
"""
43+
function QuartoSearchEntry(doc_entry::DocumenterSearchEntry, repo::String)::QuartoSearchEntry
44+
# Because our links are relative to turinglang.org/docs/, an entry from say
45+
# DynamicPPL.jl will need to be prepended with `../DynamicPPL.jl/stable` to work
46+
# correctly.
47+
location = if occursin("#", doc_entry.location)
48+
# When opening a Documenter.jl page, if the query parameter `q` is nonempty, it will
49+
# open up a search bar with that query prefilled. In contrast Quarto stores the
50+
# query parameter in case the search bar is reopened, but it doesn't actually
51+
# open the actual search bar.
52+
#
53+
# Now, if you search for `your_search_term`, Quarto always adds in
54+
# `?q=your_search_term` to its search bar links. This allows search bars to be
55+
# 'prefilled' the next time you open it up. We want to _disable_ this behaviour for
56+
# Documenter.jl links, because it's quite annoying navigating somewhere else and
57+
# having an ugly search bar pop up, so we sneak in an empty query parameter into the
58+
# URL. This is a real hack, but the alternative would be to modify Quarto's JS code
59+
# itself, which is probably worse.
60+
#
61+
# Note that query params should come before anchors, hence the order here.
62+
before_anchor, anchor = split(doc_entry.location, "#"; limit=2)
63+
location = before_anchor * "?q=#" * anchor
64+
joinpath("..", repo, "stable", location)
65+
else
66+
# See above for reasoning.
67+
joinpath("..", repo, "stable", doc_entry.location, "?q=")
68+
end
69+
return QuartoSearchEntry(
70+
# objectID
71+
location,
72+
# href
73+
location,
74+
# title
75+
"[$repo] $(doc_entry.page)",
76+
# section
77+
doc_entry.title,
78+
# text
79+
doc_entry.text,
80+
# crumbs (no idea what to put here)
81+
nothing,
82+
)
83+
end
84+
85+
"""
86+
get_quarto_search_index() -> Vector{QuartoSearchEntry}
87+
88+
Fetches the Quarto search index either from a local file (if the docs have already been
89+
built); if not, fetches it from the TuringLang/docs GitHub repository.
90+
"""
91+
function get_quarto_search_index()
92+
search_index = if isfile("_site/search.json")
93+
@info "Using local search index..."
94+
JSON.parsefile("_site/search.json", Vector{QuartoSearchEntry})
95+
else
96+
@info "Downloading search index from GitHub..."
97+
resp = HTTP.get(
98+
"https://raw.githubusercontent.com/TuringLang/docs/refs/heads/gh-pages/search_original.json"
99+
)
100+
JSON.parse(String(resp.body), Vector{QuartoSearchEntry})
101+
end
102+
# Based on manual inspection of the search index, it appears that the `objectID` and
103+
# `href` attributes should match. I don't know if Quarto guarantees this, so we warn
104+
# just in case they don't.
105+
for entry in search_index
106+
if entry.objectID != entry.href
107+
@warn "mismatched objectID and href" objectID=entry.objectID href=entry.href
108+
end
109+
end
110+
return search_index
111+
end
112+
113+
"""
114+
get_documenter_search_index(repo::String) -> Vector{DocumenterSearchEntry}
115+
116+
Fetches the Documenter.jl search index for the given repository from the published
117+
documentation. This assumes that there is a 'stable' version of the docs (if this isn't the
118+
case, it should definitely be fixed in the upstream repo by triggering a new release with a
119+
working Documenter build.)
120+
"""
121+
function get_documenter_search_index(repo::String)
122+
url = "https://turinglang.org/$repo/stable/search_index.js"
123+
@info "Downloading Documenter.jl search index from $url"
124+
contents = String(HTTP.get(url).body)
125+
# This file is actually a JavaScript file that says
126+
# var documenterSearchIndex = {"docs": [ ... ]};
127+
# We only want the dictionary, but we should probably check that that file does actually
128+
# start with that.
129+
prefix = r"^var documenterSearchIndex = "
130+
if !occursin(prefix, contents)
131+
error("Unexpected format of search_index.js file")
132+
end
133+
json = replace(contents, prefix => "")
134+
return JSON.parse(json, Dict{String, Vector{DocumenterSearchEntry}})["docs"]
135+
end
136+
137+
# TODO: Do we also want to include search results from main site? It generally doesn't seem
138+
# like a very meaningful thing to include in the search, and it can clutter actual useful
139+
# results. See e.g. https://github.com/TuringLang/docs/issues/634
140+
# I'm going to say no for now.
141+
142+
repos = [
143+
"Turing.jl",
144+
"DynamicPPL.jl",
145+
"Bijectors.jl",
146+
"JuliaBUGS.jl",
147+
"AbstractMCMC.jl",
148+
"AdvancedMH.jl",
149+
"AdvancedHMC.jl",
150+
"AdvancedVI.jl",
151+
"MCMCChains.jl",
152+
"MCMCDiagnosticTools.jl",
153+
"SliceSampling.jl",
154+
"EllipticalSliceSampling.jl",
155+
]
156+
# Get docs entries
157+
all_entries = get_quarto_search_index()
158+
@info "Fetched $(length(all_entries)) entries from main docs"
159+
# Get entries from other repos
160+
for repo in repos
161+
doc_entries = get_documenter_search_index(repo)
162+
@info "Fetched $(length(doc_entries)) entries from $repo"
163+
quarto_entries = QuartoSearchEntry.(doc_entries, repo)
164+
append!(all_entries, quarto_entries)
165+
end
166+
167+
# Check that we are running from repo root
168+
if !isdir("_site")
169+
error("This script must be run from the root of the repository")
170+
end
171+
# Move the old search index out of the way and write the new combined one
172+
output_file = "_site/search.json"
173+
Base.rename(output_file, "_site/search_original.json"; force=true)
174+
JSON.json(output_file, all_entries; pretty=2)
175+
@info "Wrote $(length(all_entries)) entries to $output_file"

0 commit comments

Comments
 (0)