From 8e6a1ec9860e0a82ee0bb043e4f3552b8a69aacb Mon Sep 17 00:00:00 2001
From: Joan Antoni RE <joanantoni.re16@gmail.com>
Date: Fri, 31 Oct 2025 15:39:41 +0100
Subject: [PATCH 1/2] Hydration API request v2 proposal

---
 .../src/nucliadb_models/hydration_v2.py       | 220 ++++++++++++++++++
 nucliadb_models/tests/test_hydration_v2.py    | 190 +++++++++++++++
 2 files changed, 410 insertions(+)
 create mode 100644 nucliadb_models/src/nucliadb_models/hydration_v2.py
 create mode 100644 nucliadb_models/tests/test_hydration_v2.py

diff --git a/nucliadb_models/src/nucliadb_models/hydration_v2.py b/nucliadb_models/src/nucliadb_models/hydration_v2.py
new file mode 100644
index 0000000000..7b99e4ced8
--- /dev/null
+++ b/nucliadb_models/src/nucliadb_models/hydration_v2.py
@@ -0,0 +1,220 @@
+# Copyright 2025 Bosutech XXI S.L.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Annotated, Any, Literal
+from uuid import UUID
+
+from pydantic import BaseModel, Discriminator, Field, StringConstraints, Tag, model_validator
+from typing_extensions import Self
+
+from nucliadb_models import hydration
+
+
+class SelectProp(BaseModel):
+    prop: Any
+
+    @model_validator(mode="after")
+    def set_discriminator(self) -> Self:
+        # Ensure discriminator is explicitly set so it's always serialized
+        self.prop = self.prop
+        return self
+
+
+def prop_discriminator(v: Any) -> str | None:
+    if isinstance(v, dict):
+        return v.get("prop", None)
+    else:
+        return getattr(v, "prop", None)
+
+
+def from_discriminator(v: Any) -> str | None:
+    if isinstance(v, dict):
+        return v.get("from", None)
+    else:
+        return getattr(v, "from", None)
+
+
+# Ids
+
+ResourceId = UUID
+
+FieldId = Annotated[
+    str,
+    StringConstraints(
+        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+$",
+        min_length=32 + 1 + 1 + 1 + 1,
+        # max field id of 250
+        max_length=32 + 1 + 1 + 1 + 250,
+    ),
+]
+
+ParagraphId = Annotated[
+    str,
+    StringConstraints(
+        pattern=r"^[0-9a-f]{32}/[acftu]/[a-zA-Z0-9:_-]+/[0-9]+-[0-9]+$",
+        min_length=32 + 1 + 1 + 1 + 1 + 1 + 3,
+        # max field id of 250 and 10 digit paragraphs. More than enough
+        max_length=32 + 1 + 1 + 1 + 250 + 1 + 21,
+    ),
+]
+
+# SELECT props
+
+
+class ParagraphText(SelectProp):
+    prop: Literal["text"] = "text"
+
+
+class ParagraphImage(SelectProp):
+    prop: Literal["image"] = "image"
+
+
+class ParagraphTable(SelectProp):
+    prop: Literal["table"] = "table"
+
+
+class RelatedParagraphs(SelectProp):
+    prop: Literal["related"] = "related"
+    neighbours: hydration.NeighbourParagraphHydration
+
+
+ParagraphProp = Annotated[
+    (
+        Annotated[ParagraphText, Tag("text")]
+        | Annotated[ParagraphImage, Tag("image")]
+        | Annotated[ParagraphTable, Tag("table")]
+        | Annotated[RelatedParagraphs, Tag("related")]
+    ),
+    Discriminator(prop_discriminator),
+]
+
+
+class FieldText(SelectProp):
+    prop: Literal["text"] = "text"
+
+
+class FieldValue(SelectProp):
+    prop: Literal["value"] = "value"
+
+
+FieldProp = Annotated[
+    (Annotated[FieldText, Tag("text")] | Annotated[FieldValue, Tag("value")]),
+    Discriminator(prop_discriminator),
+]
+
+
+class ConversationAttachments(SelectProp):
+    prop: Literal["attachments"] = "attachments"
+    text: bool
+    image: bool
+
+
+class ResourceTitle(SelectProp):
+    prop: Literal["title"] = "title"
+
+
+class ResourceSummary(SelectProp):
+    prop: Literal["summary"] = "summary"
+
+
+class ResourceOrigin(SelectProp):
+    prop: Literal["origin"] = "origin"
+
+
+class ResourceSecurity(SelectProp):
+    prop: Literal["security"] = "security"
+
+
+class ResourceFieldsFilter(BaseModel):
+    ids: list[str]
+
+
+class ResourceFields(SelectProp):
+    """Virtual property to access resource fields"""
+
+    prop: Literal["fields"] = "fields"
+    select: list[FieldProp]
+    filter: ResourceFieldsFilter | None = None
+
+
+ResourceProp = Annotated[
+    (
+        Annotated[ResourceTitle, Tag("title")]
+        | Annotated[ResourceSummary, Tag("summary")]
+        | Annotated[ResourceOrigin, Tag("origin")]
+        | Annotated[ResourceSecurity, Tag("security")]
+        | Annotated[ResourceFields, Tag("fields")]
+    ),
+    Discriminator(prop_discriminator),
+]
+
+
+# Hydration
+
+
+class ResourceHydration(BaseModel, extra="forbid"):
+    given: list[ResourceId | FieldId | ParagraphId]
+    select: list[ResourceProp]
+    from_: Literal["resources"] = Field("resources", alias="from")
+
+
+class ConversationHydrationLimits(BaseModel):
+    max_messages: int | None = Field(default=15, ge=0)
+
+
+class ConversationHydration(BaseModel, extra="forbid"):
+    given: list[FieldId | ParagraphId]
+    select: list[FieldProp | ConversationAttachments]
+    from_: Literal["conversations"] = Field("conversations", alias="from")
+    limits: ConversationHydrationLimits | None = Field(default_factory=ConversationHydrationLimits)
+
+
+class FieldHydration(BaseModel, extra="forbid"):
+    given: list[FieldId | ParagraphId]
+    select: list[FieldProp]
+    from_: Literal["fields"] = Field("fields", alias="from")
+
+
+class ParagraphHydration(BaseModel, extra="forbid"):
+    given: list[ParagraphId]
+    select: list[ParagraphProp]
+    from_: Literal["paragraphs"] = Field("paragraphs", alias="from")
+
+
+class HydrationLimits(BaseModel, extra="forbid"):
+    # TODO: global hydration limits (max chars, images, image size...)
+    ...
+
+
+Hydration = Annotated[
+    (
+        Annotated[ResourceHydration, Tag("resources")]
+        | Annotated[FieldHydration, Tag("fields")]
+        | Annotated[ConversationHydration, Tag("conversations")]
+        | Annotated[ParagraphHydration, Tag("paragraphs")]
+    ),
+    Discriminator(from_discriminator),
+]
+
+
+class HydrationRequest(BaseModel, extra="forbid"):
+    hydrations: list[Hydration] = Field(
+        default_factory=list,
+        description="List of hydrations to be performed",
+    )
+
+    limits: HydrationLimits | None = Field(
+        default=None,
+        description="Global hydration limits applied to the whole request",
+    )
diff --git a/nucliadb_models/tests/test_hydration_v2.py b/nucliadb_models/tests/test_hydration_v2.py
new file mode 100644
index 0000000000..332ae5e0b2
--- /dev/null
+++ b/nucliadb_models/tests/test_hydration_v2.py
@@ -0,0 +1,190 @@
+# Copyright 2025 Bosutech XXI S.L.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from uuid import uuid4
+
+from nucliadb_models import hydration_v2
+
+
+def test_hydration_v2():
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": [uuid4().hex],
+                    "select": [
+                        {"prop": "title"},
+                        {"prop": "summary"},
+                        {"prop": "origin"},
+                        {"prop": "security"},
+                    ],
+                    "from": "resources",
+                },
+                {
+                    "given": [f"{uuid4().hex}/t/text", f"{uuid4().hex}/f/file"],
+                    "select": [
+                        {"prop": "value"},
+                        {"prop": "text"},
+                    ],
+                    "from": "fields",
+                },
+                {
+                    "given": [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"],
+                    "select": [
+                        {"prop": "text"},
+                        {"prop": "image"},
+                        {"prop": "table"},
+                    ],
+                    "from": "paragraphs",
+                },
+            ]
+        }
+    )
+
+
+def test_full_resource_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {
+                            "prop": "fields",
+                            "select": [
+                                {"prop": "text"},
+                            ],
+                        },
+                    ],
+                    "from": "resources",
+                }
+            ]
+        }
+    )
+
+
+def test_field_extension_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {
+                            "prop": "fields",
+                            "select": [
+                                {"prop": "text"},
+                            ],
+                            "filter": {"ids": ["a/title"]},
+                        },
+                    ],
+                    "from": "resources",
+                }
+            ]
+        }
+    )
+
+
+def test_metadata_extension_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {"prop": "origin"},
+                        # TODO: props for classification_labels, ner...
+                    ],
+                    "from": "resources",
+                }
+            ]
+        }
+    )
+
+
+def test_neighbouring_paragraph_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {
+                            "prop": "related",
+                            "neighbours": {
+                                "before": 2,
+                                "after": 2,
+                            },
+                        },
+                    ],
+                    "from": "paragraphs",
+                }
+            ]
+        }
+    )
+
+
+def test_hierarchy_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {"prop": "title"},
+                        {"prop": "summary"},
+                    ],
+                    "from": "resources",
+                },
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {"prop": "text"},
+                    ],
+                    "from": "paragraphs",
+                },
+            ]
+        }
+    )
+
+
+def test_conversational_strategy():
+    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+
+    _ = hydration_v2.HydrationRequest.model_validate(
+        {
+            "hydrations": [
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        {"prop": "attachments", "text": True, "image": False},
+                    ],
+                    "from": "conversations",
+                    "limits": {"max_messages": 5},
+                }
+            ]
+        }
+    )

From 0e39628ee786d9a7a5003330804ecd613a6dccff Mon Sep 17 00:00:00 2001
From: Joan Antoni RE <joanantoni.re16@gmail.com>
Date: Mon, 10 Nov 2025 10:32:35 +0100
Subject: [PATCH 2/2] Remove unnested selects

---
 nucliadb_models/tests/test_hydration_v2.py | 40 +++++++++++-----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/nucliadb_models/tests/test_hydration_v2.py b/nucliadb_models/tests/test_hydration_v2.py
index 332ae5e0b2..31e39ae27b 100644
--- a/nucliadb_models/tests/test_hydration_v2.py
+++ b/nucliadb_models/tests/test_hydration_v2.py
@@ -56,22 +56,17 @@ def test_hydration_v2():
 
 
 def test_full_resource_strategy():
-    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+    resource_ids = [f"{uuid4().hex}", f"{uuid4().hex}"]
 
     _ = hydration_v2.HydrationRequest.model_validate(
         {
             "hydrations": [
                 {
-                    "given": paragraph_ids,
+                    "given": resource_ids,
                     "select": [
-                        {
-                            "prop": "fields",
-                            "select": [
-                                {"prop": "text"},
-                            ],
-                        },
+                        {"prop": "text"},
                     ],
-                    "from": "resources",
+                    "from": "fields",
                 }
             ]
         }
@@ -79,23 +74,18 @@ def test_full_resource_strategy():
 
 
 def test_field_extension_strategy():
-    paragraph_ids = [f"{uuid4().hex}/t/text/0-10", f"{uuid4().hex}/f/file/20-25"]
+    resource_ids = [f"{uuid4().hex}", f"{uuid4().hex}"]
 
     _ = hydration_v2.HydrationRequest.model_validate(
         {
             "hydrations": [
                 {
-                    "given": paragraph_ids,
+                    "given": resource_ids,
                     "select": [
-                        {
-                            "prop": "fields",
-                            "select": [
-                                {"prop": "text"},
-                            ],
-                            "filter": {"ids": ["a/title"]},
-                        },
+                        {"prop": "text"},
                     ],
-                    "from": "resources",
+                    "from": "fields",
+                    "filter": {"ids": ["a/title"]},
                 }
             ]
         }
@@ -184,7 +174,17 @@ def test_conversational_strategy():
                     ],
                     "from": "conversations",
                     "limits": {"max_messages": 5},
-                }
+                },
+                {
+                    "given": paragraph_ids,
+                    "select": [
+                        # we do have this implemented but not exposed. Given a
+                        # conversation, if it's a question try to find a
+                        # following message marked as answer in the same page
+                        {"prop": "answer"},
+                    ],
+                    "from": "conversations",
+                },
             ]
         }
     )