datopian · demenech · Sep 24, 2025 · coderabbitai · Sep 24, 2025 · coderabbitai
diff --git a/components/dataset/search/ListOfDatasets.tsx b/components/dataset/search/ListOfDatasets.tsx
@@ -51,7 +51,7 @@ function ListItems() {
 
       <FilterBadges />
       <div className="flex flex-col gap-8 mt-4">
-        {searchResults?.datasets?.map((dataset) => (
+        {searchResults?.results?.map((dataset) => (
           <DatasetItem key={dataset.id} dataset={dataset} />
         ))}
       </div>

diff --git a/components/dataset/search/SearchContext.tsx b/components/dataset/search/SearchContext.tsx
@@ -66,6 +66,7 @@ export const SearchStateProvider = ({
     offset: options.type != "dataset" ? 0 : options.offset,
     type: "dataset",
   };
+  // NOTE: our goal is to get rid of this call
   const {
     data: packageSearchResults,
     isValidating: isLoadingPackageSearchResults,
@@ -77,6 +78,20 @@ export const SearchStateProvider = ({
     { use: [laggy] }
   );
 
+  const { data: cachedDatasets, isValidating: isLoadingCachedDatasets } =
+    useSWR([packagesOptions], async (options) => {
+      const searchParams = new URLSearchParams();
+      searchParams.set("limit", String(options.limit));
+      const page = Math.floor(options.offset ?? 0 / options.limit) + 1;
+      searchParams.set("page", String(page));
+      searchParams.set("query", String(options.query));
+      const datasets = await fetch(
+        `/api/datasets/search?${searchParams.toString()}`
+      );
+      const data = await datasets.json();
+      return data;
+    });
+
   const visualizationsOptions = {
     ...options,
     resFormat: [],
@@ -97,11 +112,11 @@ export const SearchStateProvider = ({
   const searchResults =
     options.type === "visualization"
       ? visualizationsSearchResults
-      : packageSearchResults;
+      : cachedDatasets;
   const isLoading =
     options.type === "visualization"
       ? isLoadingVisualizations
-      : isLoadingPackageSearchResults;
+      : isLoadingCachedDatasets;
 
   const packageSearchFacets = packageSearchResults?.search_facets ?? {};
   const visualizationsSearchFacets =
@@ -111,7 +126,6 @@ export const SearchStateProvider = ({
       ? visualizationsSearchFacets
       : packageSearchFacets;
 
-
   const value: SearchStateContext = {
     options,
     setOptions: (options) => setQueryParam(options),

diff --git a/lib/data.ts b/lib/data.ts
@@ -0,0 +1,69 @@
+import { searchDatasets } from "@/lib/queries/dataset";
+import { Dataset } from "@/schemas/dataset.interface";
+import { unstable_cache } from "next/cache";
+import { z } from "zod";
+
+// FIXME: how can we prevent simulateneous cache revalidations
+// when a cache revalidation is requested while another is already
+// running? Woudln't happen with revalidate set to false
+export const getCachedDatasets = unstable_cache(
+  async () => {
+    console.log("Revalidating datasets cache: ", new Date().getTime());
+    const allDatasets: Dataset[] = [];
+    const limit = 10;
+    let page = 0;
+    while (true) {
+      const pageDatasets = await searchDatasets({
+        limit,
+        offset: limit * page,
+        groups: [],
+        orgs: [],
+        tags: [],
+      });
-      const pageDatasets = await searchDatasets({
-        limit,
-        offset: limit * page,
-        groups: [],
-        orgs: [],
-        tags: [],
-      });
+      const pageDatasets = await searchDatasets({
+        limit,
+        offset: limit * page,
+        groups: [],
+        orgs: [],
+        tags: [],
+        type: "dataset",
+      });
-      const pageDatasets = await searchDatasets({
-        limit,
-        offset: limit * page,
-        groups: [],
-        orgs: [],
-        tags: [],
-      });
+      const pageDatasets = await searchDatasets({
+        limit,
+        offset: limit * page,
+        groups: [],
+        orgs: [],
+        tags: [],
+        type: "dataset",
+      });
+
+      if (!pageDatasets?.results?.length) {
+        break;
+      }
+
+      allDatasets.push(...pageDatasets.results);
+      page++;
+    }
+    return allDatasets;
+  },
+  ["cached-datasets"],
+  {
+    revalidate: false, // TODO: what happens if the UI triggers a time-based revalidation?
+  }
+);
+
+export const searchOptionsSchema = z.object({
+  limit: z
+    .preprocess((x) => Number(x), z.number().min(0).max(25))
+    .optional()
+    .default(10),
+  page: z
+    .preprocess((x) => Number(x), z.number().min(1))
+    .optional()
+    .default(1),
+});
-export const searchOptionsSchema = z.object({
-  limit: z
-    .preprocess((x) => Number(x), z.number().min(0).max(25))
-    .optional()
-    .default(10),
-  page: z
-    .preprocess((x) => Number(x), z.number().min(1))
-    .optional()
-    .default(1),
-});
+export const searchOptionsSchema = z.object({
+  limit: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1).max(25))
+    .optional()
+    .default(10),
+  page: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1))
+    .optional()
+    .default(1),
+});
-export const searchOptionsSchema = z.object({
-  limit: z
-    .preprocess((x) => Number(x), z.number().min(0).max(25))
-    .optional()
-    .default(10),
-  page: z
-    .preprocess((x) => Number(x), z.number().min(1))
-    .optional()
-    .default(1),
-});
+export const searchOptionsSchema = z.object({
+  limit: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1).max(25))
+    .optional()
+    .default(10),
+  page: z
+    .preprocess((x) => (x === undefined || x === "" ? undefined : Number(x)), z.number().int().min(1))
+    .optional()
+    .default(1),
+});
+
+type SearchOptions = z.infer<typeof searchOptionsSchema>;
+
+// NOTE: for search, I think we should use a lib like minisearch
+// for the FTS, and use a DTO to return results. We could even
+// cache this list of datasets DTO. This would reduce data transfer
+// and increase performance in the pages that use search
+// The search index can be a module-level singleton, it doesn't have
+// to be cached
+export async function searchCachedDatasets(options: SearchOptions) {
+  const { page, limit } = options;
+  const allDatasets = await getCachedDatasets();
+  const filteredDatasets = allDatasets;
+  // NOTE: maybe https://github.com/itemsapi/itemsjs instead of minisearch ?
+
+  const startIdx = (page - 1) * limit;
+  const endIdx = startIdx + limit;
+  const paginatedDatasets = filteredDatasets.slice(startIdx, endIdx);
+  return { results: paginatedDatasets, count: filteredDatasets.length };
+}
+
diff --git a/pages/api/datasets/search.tsx b/pages/api/datasets/search.tsx
@@ -0,0 +1,24 @@
+import { searchCachedDatasets, searchOptionsSchema } from "@/lib/data";
+import { NextApiRequest, NextApiResponse } from "next";
+import { ZodError } from "zod";
+
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse
+) {
+  if (req.method === "GET") {
+    try {
+      const validatedOptions = searchOptionsSchema.parse(req.query);
+      const results = await searchCachedDatasets(validatedOptions);
+
+      res.status(200).json(results);
+    } catch (e) {
+      if (e instanceof ZodError) {
+        res.status(400).json({ message: "Validation Error", errors: e.issues });
+      }
+    }
-  if (req.method === "GET") {
-    try {
-      const validatedOptions = searchOptionsSchema.parse(req.query);
-      const results = await searchCachedDatasets(validatedOptions);
-
-      res.status(200).json(results);
-    } catch (e) {
-      if (e instanceof ZodError) {
-        res.status(400).json({ message: "Validation Error", errors: e.issues });
-      }
-    }
+  if (req.method === "GET") {
+    try {
+      const validatedOptions = searchOptionsSchema.parse(req.query);
+      const results = await searchCachedDatasets(validatedOptions);
+
+      res.status(200).json(results);
+    } catch (e) {
+      if (e instanceof ZodError) {
+        return res
+          .status(400)
+          .json({ message: "Validation Error", errors: e.issues });
+      }
+      console.error("Cached datasets search failed:", e);
+      return res.status(500).json({ message: "Internal Server Error" });
+    }
+  } else {
-  if (req.method === "GET") {
-    try {
-      const validatedOptions = searchOptionsSchema.parse(req.query);
-      const results = await searchCachedDatasets(validatedOptions);
-
-      res.status(200).json(results);
-    } catch (e) {
-      if (e instanceof ZodError) {
-        res.status(400).json({ message: "Validation Error", errors: e.issues });
-      }
-    }
+  if (req.method === "GET") {
+    try {
+      const validatedOptions = searchOptionsSchema.parse(req.query);
+      const results = await searchCachedDatasets(validatedOptions);
+
+      res.status(200).json(results);
+    } catch (e) {
+      if (e instanceof ZodError) {
+        return res
+          .status(400)
+          .json({ message: "Validation Error", errors: e.issues });
+      }
+      console.error("Cached datasets search failed:", e);
+      return res.status(500).json({ message: "Internal Server Error" });
+    }
+  } else {
+  } else {
+    res.setHeader("Allow", ["GET"]);
+    res.status(405).end(`Method ${req.method} Not Allowed`);
+  }
+}