Skip to content

Commit c8310f3

Browse files
authored
feat: support fuzzy matching, closes #504 (#505)
1 parent 31f98a5 commit c8310f3

File tree

12 files changed

+176
-42
lines changed

12 files changed

+176
-42
lines changed

README.md

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ yarn add @easyops-cn/docusaurus-search-local
4242

4343
Add `@easyops-cn/docusaurus-search-local` into your docusaurus themes.
4444

45-
````js
45+
```js
4646
// In your `docusaurus.config.js`:
4747
module.exports = {
4848
// ... Your other configurations.
@@ -65,7 +65,7 @@ module.exports = {
6565
],
6666
],
6767
};
68-
````
68+
```
6969

7070
> Notice: We present this as a theme instead of plugin now, see [this comment](https://github.com/facebook/docusaurus/issues/6488#issuecomment-1024124096).
7171
@@ -100,6 +100,7 @@ module.exports = {
100100
| hideSearchBarWithNoSearchContext | boolean | `false` | Whether to hide the search bar when no search context was matched. By default, if `searchContextByPaths` is set, pages which are not matched with it will be considered as with a search context of ROOT. By setting `hideSearchBarWithNoSearchContext: true`, these pages will be considered as with NO search context, and the search bar will be hidden. |
101101
| useAllContextsWithNoSearchContext | boolean | `false` | Whether to show results from all the contexts if no context is provided. This option should not be used with `hideSearchBarWithNoSearchContext: true` as this would show results when there is no search context. This will duplicate indexes and might have a performance cost depending on the index sizes. |
102102
| `forceIgnoreNoIndex` | boolean | `false` | Force enable search index even if `noIndex: true` is set, this also affects unlisted articles. |
103+
| `fuzzyMatchingDistance` | number | `1` | Set the edit distance for fuzzy matching during searches. |
103104

104105
### I18N
105106

@@ -136,22 +137,22 @@ Note that `*_plural` can be omitted if it is the same as singular.
136137

137138
This theme is shipped with polished styles just like the Algolia Search on the Docusaurus v2 website. Feel free to override these css custom properties (css variables) below.
138139

139-
| Var | Default (light) | Default (dark) |
140-
| -------------------------------- | ---------------------------------------------------------------------- | ------------------------------------------------------- |
141-
| --search-local-modal-background | `#f5f6f7` | `var(--ifm-background-color)` |
142-
| --search-local-modal-shadow | `inset 1px 1px 0 0 hsla(0, 0%, 100%, 0.5),`<br />`0 3px 8px 0 #555a64` | `inset 1px 1px 0 0 #2c2e40,`<br />`0 3px 8px 0 #000309` |
143-
| --search-local-modal-width | `560px` | - |
144-
| --search-local-modal-width-sm | `340px` | - |
145-
| --search-local-spacing | `12px` | - |
146-
| --search-local-hit-background | `#fff` | `var(--ifm-color-emphasis-100)` |
147-
| --search-local-hit-shadow | `0 1px 3px 0 #d4d9e1` | `none` |
148-
| --search-local-hit-color | `#444950` | `var(--ifm-font-color-base)` |
149-
| --search-local-hit-height | `56px` | - |
150-
| --search-local-highlight-color | `var(--ifm-color-primary)` | - |
151-
| --search-local-muted-color | `#969faf` | `var(--ifm-color-secondary-darkest)` |
152-
| --search-local-icon-stroke-width | `1.4` | - |
153-
| --search-local-hit-active-color | `var(--ifm-color-white)` | - |
154-
| --search-local-input-active-border-color | `var(--ifm-color-primary)` | - |
140+
| Var | Default (light) | Default (dark) |
141+
| ---------------------------------------- | ---------------------------------------------------------------------- | ------------------------------------------------------- |
142+
| --search-local-modal-background | `#f5f6f7` | `var(--ifm-background-color)` |
143+
| --search-local-modal-shadow | `inset 1px 1px 0 0 hsla(0, 0%, 100%, 0.5),`<br />`0 3px 8px 0 #555a64` | `inset 1px 1px 0 0 #2c2e40,`<br />`0 3px 8px 0 #000309` |
144+
| --search-local-modal-width | `560px` | - |
145+
| --search-local-modal-width-sm | `340px` | - |
146+
| --search-local-spacing | `12px` | - |
147+
| --search-local-hit-background | `#fff` | `var(--ifm-color-emphasis-100)` |
148+
| --search-local-hit-shadow | `0 1px 3px 0 #d4d9e1` | `none` |
149+
| --search-local-hit-color | `#444950` | `var(--ifm-font-color-base)` |
150+
| --search-local-hit-height | `56px` | - |
151+
| --search-local-highlight-color | `var(--ifm-color-primary)` | - |
152+
| --search-local-muted-color | `#969faf` | `var(--ifm-color-secondary-darkest)` |
153+
| --search-local-icon-stroke-width | `1.4` | - |
154+
| --search-local-hit-active-color | `var(--ifm-color-white)` | - |
155+
| --search-local-input-active-border-color | `var(--ifm-color-primary)` | - |
155156

156157
E.g.:
157158

docusaurus-search-local/src/client/theme/worker.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ export class SearchWorker {
7474
query.term(item.value, {
7575
wildcard: item.wildcard,
7676
presence: item.presence,
77+
...(item.editDistance
78+
? { editDistance: item.editDistance }
79+
: null),
7780
});
7881
}
7982
})

docusaurus-search-local/src/client/utils/__mocks__/proxiedGeneratedConstants.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ export let language = ["en", "zh"];
22
export let removeDefaultStopWordFilter = false;
33
export const searchIndexUrl = "search-index{dir}.json?_=abc";
44
export const searchResultLimits = 8;
5+
export let fuzzyMatchingDistance = 0;
56

67
export function __setLanguage(value: string[]): void {
78
language = value;
@@ -10,3 +11,7 @@ export function __setLanguage(value: string[]): void {
1011
export function __setRemoveDefaultStopWordFilter(value: boolean): void {
1112
removeDefaultStopWordFilter = value;
1213
}
14+
15+
export function __setFuzzyMatchingDistance(value: number): void {
16+
fuzzyMatchingDistance = value;
17+
}

docusaurus-search-local/src/client/utils/smartQueries.spec.ts

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { smartQueries } from "./smartQueries";
33
import {
44
__setLanguage,
55
__setRemoveDefaultStopWordFilter,
6+
__setFuzzyMatchingDistance,
67
} from "./proxiedGeneratedConstants";
78
import { SmartQuery } from "../../shared/interfaces";
89

@@ -259,6 +260,53 @@ describe("smartQueries with no stop words filter", () => {
259260
});
260261
});
261262

263+
describe("smartQueries with fuzzy matching", () => {
264+
beforeEach(() => {
265+
__setFuzzyMatchingDistance(1);
266+
});
267+
268+
test.each<[string[], TestQuery[]]>([
269+
[
270+
["a", "hello"],
271+
[
272+
{
273+
tokens: ["a", "hello"],
274+
keyword: "+a +hello",
275+
},
276+
{
277+
tokens: ["a", "hello"],
278+
keyword: "+a +hello*",
279+
},
280+
{
281+
tokens: ["a", "hello"],
282+
keyword: "+a +hello~1",
283+
},
284+
{
285+
tokens: ["a", "hello"],
286+
keyword: "+a +hello*~1",
287+
},
288+
],
289+
],
290+
[
291+
["a", "b"],
292+
[
293+
{
294+
tokens: ["a", "b"],
295+
keyword: "+a +b",
296+
},
297+
{
298+
tokens: ["a", "b"],
299+
keyword: "+a +b*",
300+
},
301+
],
302+
],
303+
])("smartQueries(%j, zhDictionary) should work", (tokens, queries) => {
304+
expect(smartQueries(tokens, zhDictionary).map(transformQuery)).toEqual(
305+
queries
306+
);
307+
});
308+
});
309+
262310
function transformQuery(query: SmartQuery): TestQuery {
263311
return {
264312
tokens: query.tokens,
@@ -275,6 +323,10 @@ function transformQuery(query: SmartQuery): TestQuery {
275323
lunr.Query.wildcard.TRAILING
276324
? "*"
277325
: ""
326+
}${
327+
typeof item.editDistance === "number" && item.editDistance > 0
328+
? `~${item.editDistance}`
329+
: ""
278330
}`
279331
)
280332
.join(" "),

docusaurus-search-local/src/client/utils/smartQueries.ts

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import lunr from "lunr";
22
import { SmartQuery, SmartTerm } from "../../shared/interfaces";
33
import { smartTerms } from "./smartTerms";
4-
import { language, removeDefaultStopWordFilter } from "./proxiedGeneratedConstants";
4+
import {
5+
language,
6+
removeDefaultStopWordFilter,
7+
fuzzyMatchingDistance,
8+
} from "./proxiedGeneratedConstants";
59

610
/**
711
* Get all possible queries for a list of tokens consists of words mixed English and Chinese,
@@ -93,39 +97,65 @@ export function smartQueries(
9397
}
9498
}
9599

96-
return getQueriesMaybeTyping(terms).concat(getQueriesMaybeTyping(extraTerms));
100+
return getQueriesMaybeTyping(terms).concat(
101+
fuzzyMatchingDistance > 0
102+
? getQueriesMaybeTyping(terms, fuzzyMatchingDistance)
103+
: [],
104+
getQueriesMaybeTyping(extraTerms),
105+
fuzzyMatchingDistance > 0
106+
? getQueriesMaybeTyping(extraTerms, fuzzyMatchingDistance)
107+
: []
108+
);
97109
}
98110

99-
function getQueriesMaybeTyping(terms: SmartTerm[]): SmartQuery[] {
100-
return termsToQueries(terms).concat(
111+
function getQueriesMaybeTyping(
112+
terms: SmartTerm[],
113+
editDistance?: number
114+
): SmartQuery[] {
115+
return termsToQueries(terms, false, editDistance).concat(
101116
termsToQueries(
102117
// Ignore terms whose last token already has a trailing wildcard,
103118
// or the last token is not `maybeTyping`.
104119
terms.filter((term) => {
105120
const token = term[term.length - 1];
106121
return !token.trailing && token.maybeTyping;
107122
}),
108-
true
123+
true,
124+
editDistance
109125
)
110126
);
111127
}
112128

113129
function termsToQueries(
114130
terms: SmartTerm[],
115-
maybeTyping?: boolean
131+
maybeTyping?: boolean,
132+
editDistance?: number
116133
): SmartQuery[] {
117-
return terms.map((term) => ({
118-
tokens: term.map((item) => item.value),
119-
term: term.map((item) => ({
120-
value: item.value,
121-
presence: lunr.Query.presence.REQUIRED,
122-
// The last token of a term maybe incomplete while user is typing.
123-
// So append more queries with trailing wildcard added.
124-
wildcard: (
125-
maybeTyping ? item.trailing || item.maybeTyping : item.trailing
126-
)
127-
? lunr.Query.wildcard.TRAILING
128-
: lunr.Query.wildcard.NONE,
129-
})),
130-
}));
134+
return terms.flatMap((term) => {
135+
const query = {
136+
tokens: term.map((item) => item.value),
137+
term: term.map((item) => ({
138+
value: item.value,
139+
presence: lunr.Query.presence.REQUIRED,
140+
// The last token of a term maybe incomplete while user is typing.
141+
// So append more queries with trailing wildcard added.
142+
wildcard: (
143+
maybeTyping ? item.trailing || item.maybeTyping : item.trailing
144+
)
145+
? lunr.Query.wildcard.TRAILING
146+
: lunr.Query.wildcard.NONE,
147+
editDistance:
148+
editDistance && item.value.length > editDistance
149+
? editDistance
150+
: undefined,
151+
})),
152+
};
153+
154+
// Ignore queries that all terms ignored edit distance due to too short tokens.
155+
if (editDistance && query.term.every((item) => !item.editDistance)) {
156+
return [];
157+
}
158+
159+
return query;
160+
});
131161
}

docusaurus-search-local/src/declarations.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ declare module "*/generated-constants.js" {
3030
export const language: string[];
3131
export const searchIndexUrl: string;
3232
export const searchResultLimits: number;
33+
export const fuzzyMatchingDistance: number;
3334
// These below are for mocking only.
3435
export const __setLanguage: (value: string[]) => void;
3536
export const __setRemoveDefaultStopWordFilter: (value: boolean) => void;

docusaurus-search-local/src/index.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,4 +199,12 @@ export interface PluginOptions {
199199
* @default false
200200
*/
201201
forceIgnoreNoIndex?: boolean;
202+
203+
/**
204+
* Set the edit distance for fuzzy matching during searches. Lower values will result in stricter matches,
205+
* while higher values will allow for more lenient matches. Set 0 to disable fuzzy matching.
206+
*
207+
* @default 1
208+
*/
209+
fuzzyMatchingDistance?: number;
202210
}

docusaurus-search-local/src/server/utils/generate.spec.ts

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -233,9 +233,25 @@ describe("generate", () => {
233233

234234
expect(mockWriteFileSync).toBeCalledWith(
235235
expect.toMatchPath("/tmp/generated.js"),
236-
expect.stringContaining(
237-
'export const Mark = null;'
238-
)
236+
expect.stringContaining("export const Mark = null;")
237+
);
238+
});
239+
240+
test("fuzzy matching distance", () => {
241+
generate(
242+
{
243+
language: ["en"],
244+
removeDefaultStopWordFilter: false,
245+
searchResultLimits: 8,
246+
searchResultContextMaxLength: 50,
247+
fuzzyMatchingDistance: 2,
248+
} as ProcessedPluginOptions,
249+
"/tmp"
250+
);
251+
252+
expect(mockWriteFileSync).toBeCalledWith(
253+
expect.toMatchPath("/tmp/generated-constants.js"),
254+
expect.stringContaining("export const fuzzyMatchingDistance = 2;")
239255
);
240256
});
241257
});

docusaurus-search-local/src/server/utils/generate.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export function generate(config: ProcessedPluginOptions, dir: string): string {
2020
searchContextByPaths,
2121
hideSearchBarWithNoSearchContext,
2222
useAllContextsWithNoSearchContext,
23+
fuzzyMatchingDistance,
2324
} = config;
2425
const indexHash = getIndexHash(config);
2526
const contents: string[] = [];
@@ -148,8 +149,14 @@ export function generate(config: ProcessedPluginOptions, dir: string): string {
148149
constantContents.push(
149150
`export const searchIndexUrl = ${JSON.stringify(searchIndexUrl)};`,
150151
`export const searchResultLimits = ${JSON.stringify(searchResultLimits)};`,
152+
`export const fuzzyMatchingDistance = ${JSON.stringify(
153+
fuzzyMatchingDistance
154+
)};`
155+
);
156+
fs.writeFileSync(
157+
path.join(dir, "generated-constants.js"),
158+
constantContents.join("\n")
151159
);
152-
fs.writeFileSync(path.join(dir, "generated-constants.js"), constantContents.join("\n"));
153160

154161
return searchIndexFilename;
155162
}

0 commit comments

Comments
 (0)