Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/dataset/search/ListOfDatasets.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function ListItems() {

<FilterBadges />
<div className="flex flex-col gap-8 mt-4">
{searchResults?.datasets?.map((dataset) => (
{searchResults?.results?.map((dataset) => (
<DatasetItem key={dataset.id} dataset={dataset} />
))}
</div>
Expand Down
20 changes: 17 additions & 3 deletions components/dataset/search/SearchContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ export const SearchStateProvider = ({
offset: options.type != "dataset" ? 0 : options.offset,
type: "dataset",
};
// NOTE: our goal is to get rid of this call
const {
data: packageSearchResults,
isValidating: isLoadingPackageSearchResults,
Expand All @@ -77,6 +78,20 @@ export const SearchStateProvider = ({
{ use: [laggy] }
);

const { data: cachedDatasets, isValidating: isLoadingCachedDatasets } =
useSWR([packagesOptions], async (options) => {
const searchParams = new URLSearchParams();
searchParams.set("limit", String(options.limit));
const page = Math.floor(options.offset ?? 0 / options.limit) + 1;
searchParams.set("page", String(page));
searchParams.set("query", String(options.query));
const datasets = await fetch(
`/api/datasets/search?${searchParams.toString()}`
);
const data = await datasets.json();
return data;
});

const visualizationsOptions = {
...options,
resFormat: [],
Expand All @@ -97,11 +112,11 @@ export const SearchStateProvider = ({
const searchResults =
options.type === "visualization"
? visualizationsSearchResults
: packageSearchResults;
: cachedDatasets;
const isLoading =
options.type === "visualization"
? isLoadingVisualizations
: isLoadingPackageSearchResults;
: isLoadingCachedDatasets;

const packageSearchFacets = packageSearchResults?.search_facets ?? {};
const visualizationsSearchFacets =
Expand All @@ -111,7 +126,6 @@ export const SearchStateProvider = ({
? visualizationsSearchFacets
: packageSearchFacets;


const value: SearchStateContext = {
options,
setOptions: (options) => setQueryParam(options),
Expand Down
69 changes: 69 additions & 0 deletions lib/data.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import { searchDatasets } from "@/lib/queries/dataset";
import { Dataset } from "@/schemas/dataset.interface";
import { unstable_cache } from "next/cache";
import { z } from "zod";

// FIXME: how can we prevent simulateneous cache revalidations
// when a cache revalidation is requested while another is already
// running? Woudln't happen with revalidate set to false
export const getCachedDatasets = unstable_cache(
async () => {
console.log("Revalidating datasets cache: ", new Date().getTime());
const allDatasets: Dataset[] = [];
const limit = 10;
let page = 0;
while (true) {
const pageDatasets = await searchDatasets({
limit,
offset: limit * page,
groups: [],
orgs: [],
tags: [],
});
Comment on lines +16 to +22
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Constrain cached corpus to datasets

Without type: "dataset", the cache may include visualizations. Aligns with UI using cache only for dataset searches.

Apply this diff:

       const pageDatasets = await searchDatasets({
         limit,
         offset: limit * page,
         groups: [],
         orgs: [],
         tags: [],
+        type: "dataset",
       });
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
const pageDatasets = await searchDatasets({
limit,
offset: limit * page,
groups: [],
orgs: [],
tags: [],
});
const pageDatasets = await searchDatasets({
limit,
offset: limit * page,
groups: [],
orgs: [],
tags: [],
type: "dataset",
});
🤖 Prompt for AI Agents
In lib/data.ts around lines 16 to 22, the searchDatasets call is missing a type
filter so the cached corpus can include non-dataset results (like
visualizations); add type: "dataset" to the argument object passed to
searchDatasets so the cache only stores datasets, e.g. include type: "dataset"
alongside limit, offset, groups, orgs, and tags to constrain results to
datasets.


if (!pageDatasets?.results?.length) {
break;
}

allDatasets.push(...pageDatasets.results);
page++;
}
return allDatasets;
},
["cached-datasets"],
{
revalidate: false, // TODO: what happens if the UI triggers a time-based revalidation?
}
);

export const searchOptionsSchema = z.object({
limit: z
.preprocess((x) => Number(x), z.number().min(0).max(25))
.optional()
.default(10),
page: z
.preprocess((x) => Number(x), z.number().min(1))
.optional()
.default(1),
});
Comment on lines +39 to +48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Zod preprocess breaks defaults; missing params parse as NaN

Number(undefined) yields NaN, so .optional().default(...) won’t apply. Also prefer min(1) for limit.

Apply this diff:

 export const searchOptionsSchema = z.object({
-  limit: z
-    .preprocess((x) => Number(x), z.number().min(0).max(25))
-    .optional()
-    .default(10),
-  page: z
-    .preprocess((x) => Number(x), z.number().min(1))
-    .optional()
-    .default(1),
+  limit: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1).max(25))
+    .optional()
+    .default(10),
+  page: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1))
+    .optional()
+    .default(1),
 });
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
export const searchOptionsSchema = z.object({
limit: z
.preprocess((x) => Number(x), z.number().min(0).max(25))
.optional()
.default(10),
page: z
.preprocess((x) => Number(x), z.number().min(1))
.optional()
.default(1),
});
export const searchOptionsSchema = z.object({
limit: z
.preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1).max(25))
.optional()
.default(10),
page: z
.preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1))
.optional()
.default(1),
});
🤖 Prompt for AI Agents
In lib/data.ts around lines 39 to 48, the preprocess currently does Number(x)
which turns undefined into NaN so the .default() never applies; change the
preprocess to return undefined when input is undefined (e.g. preprocess: x => x
=== undefined ? undefined : Number(x)) so Zod can apply the optional default,
and update limit’s min constraint from 0 to 1 (keep max 25) so limit must be at
least 1.


type SearchOptions = z.infer<typeof searchOptionsSchema>;

// NOTE: for search, I think we should use a lib like minisearch
// for the FTS, and use a DTO to return results. We could even
// cache this list of datasets DTO. This would reduce data transfer
// and increase performance in the pages that use search
// The search index can be a module-level singleton, it doesn't have
// to be cached
export async function searchCachedDatasets(options: SearchOptions) {
const { page, limit } = options;
const allDatasets = await getCachedDatasets();
const filteredDatasets = allDatasets;
// NOTE: maybe https://github.com/itemsapi/itemsjs instead of minisearch ?

const startIdx = (page - 1) * limit;
const endIdx = startIdx + limit;
const paginatedDatasets = filteredDatasets.slice(startIdx, endIdx);
return { results: paginatedDatasets, count: filteredDatasets.length };
}

24 changes: 24 additions & 0 deletions pages/api/datasets/search.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import { searchCachedDatasets, searchOptionsSchema } from "@/lib/data";
import { NextApiRequest, NextApiResponse } from "next";
import { ZodError } from "zod";

export default async function handler(
req: NextApiRequest,
res: NextApiResponse
) {
if (req.method === "GET") {
try {
const validatedOptions = searchOptionsSchema.parse(req.query);
const results = await searchCachedDatasets(validatedOptions);

res.status(200).json(results);
} catch (e) {
if (e instanceof ZodError) {
res.status(400).json({ message: "Validation Error", errors: e.issues });
}
}
Comment on lines +9 to +19
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Add 500 fallback and return after 400

Non‑Zod errors lead to a hanging request. Also return after sending 400 to avoid double responses.

Apply this diff:

   if (req.method === "GET") {
     try {
       const validatedOptions = searchOptionsSchema.parse(req.query);
       const results = await searchCachedDatasets(validatedOptions);

       res.status(200).json(results);
     } catch (e) {
       if (e instanceof ZodError) {
-        res.status(400).json({ message: "Validation Error", errors: e.issues });
+        return res
+          .status(400)
+          .json({ message: "Validation Error", errors: e.issues });
       }
+      console.error("Cached datasets search failed:", e);
+      return res.status(500).json({ message: "Internal Server Error" });
     }
   } else {
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
if (req.method === "GET") {
try {
const validatedOptions = searchOptionsSchema.parse(req.query);
const results = await searchCachedDatasets(validatedOptions);
res.status(200).json(results);
} catch (e) {
if (e instanceof ZodError) {
res.status(400).json({ message: "Validation Error", errors: e.issues });
}
}
if (req.method === "GET") {
try {
const validatedOptions = searchOptionsSchema.parse(req.query);
const results = await searchCachedDatasets(validatedOptions);
res.status(200).json(results);
} catch (e) {
if (e instanceof ZodError) {
return res
.status(400)
.json({ message: "Validation Error", errors: e.issues });
}
console.error("Cached datasets search failed:", e);
return res.status(500).json({ message: "Internal Server Error" });
}
} else {
🤖 Prompt for AI Agents
In pages/api/datasets/search.tsx around lines 9 to 19, the try/catch only
handles ZodError and does not return after sending the 400 response nor handle
other exceptions, which can hang the request or cause double responses; modify
the catch to return immediately after res.status(400).json(...) for Zod errors,
and add an else branch that logs the error and responds with
res.status(500).json({ message: "Internal Server Error" }) for non‑Zod
exceptions so every error path sends a single response.

} else {
res.setHeader("Allow", ["GET"]);
res.status(405).end(`Method ${req.method} Not Allowed`);
}
}