BioDataFuse · jmillanacosta · Mar 9, 2026 · Mar 4, 2026 · Mar 9, 2026
diff --git a/CITATION.cff b/CITATION.cff
@@ -23,35 +23,28 @@ authors:
     given-names: Tooba
     affiliation: Maastricht University
     orcid: https://orcid.org/0000-0002-4904-3269
-
   - family-names: Gadiya
     given-names: Yojana
     affiliation: Enveda Therapeutics (United States)
     orcid: https://orcid.org/0000-0002-7683-0452
-
   - family-names: Millán Acosta
     given-names: Javier
     orcid: https://orcid.org/0000-0002-4166-7093
-
   - family-names: Willighagen
     given-names: Egon
     affiliation: Maastricht University
     orcid: https://orcid.org/0000-0001-7542-0286
-
   - family-names: Mei
     given-names: Hailiang
     orcid: https://orcid.org/0000-0003-1781-5508
-
   - family-names: Sima
     given-names: Ana Claudia
     affiliation: Swiss Institute of Bioinformatics Geneva
     orcid: https://orcid.org/0000-0003-3213-4495
-
   - family-names: Martinát
     given-names: Dominik
     affiliation: Univerzita Palackého v Olomouci Přírodovědecká fakulta
     orcid: https://orcid.org/0000-0001-6611-7883
-
   - family-names: Adriaque Lozano
     given-names: Alejandro
     affiliation: Maastricht University
@@ -62,8 +55,8 @@ identifiers:
     value: 10.5281/zenodo.18468943
     description: Archived software release on Zenodo
 
-repository-code: https://github.com/<org>/pyBioDataFuse
-url: https://github.com/<org>/pyBioDataFuse
+repository-code: https://github.com/BioDataFuse/pyBiodatafuse
+url: https://github.com/BioDataFuse/pyBiodatafuse
 
 keywords:
   - biomedical data integration
@@ -75,6 +68,32 @@ keywords:
 
 preferred-citation:
   type: article
-  title: "pyBiodatafuse: extending interoperability of data using modular queries across biomedical resources" 
+  title: "pyBiodatafuse: extending interoperability of data using modular queries across biomedical resources"
   journal: Bioinformatics
+  year: 2023
   doi: 10.1093/bioinformatics/btag064
+  authors:
+    - family-names: Gadiya
+      given-names: Yojana
+      orcid: https://orcid.org/0000-0002-7683-0452
+    - family-names: Millán Acosta
+      given-names: Javier
+      orcid: https://orcid.org/0000-0002-4166-7093
+    - family-names: Abbassi-Daloii
+      given-names: Tooba
+      orcid: https://orcid.org/0000-0002-4904-3269
+    - family-names: Willighagen
+      given-names: Egon
+      orcid: https://orcid.org/0000-0001-7542-0286
+    - family-names: Mei
+      given-names: Hailiang
+      orcid: https://orcid.org/0000-0003-1781-5508
+    - family-names: Sima
+      given-names: Ana Claudia
+      orcid: https://orcid.org/0000-0003-3213-4495
+    - family-names: Martinát
+      given-names: Dominik
+      orcid: https://orcid.org/0000-0001-6611-7883
+    - family-names: Adriaque Lozano
+      given-names: Alejandro
+      orcid: https://orcid.org/0009-0007-2725-2098
diff --git a/src/pyBiodatafuse/analyzer/explorer/patent.py b/src/pyBiodatafuse/analyzer/explorer/patent.py
@@ -8,11 +8,12 @@
 import time
 from typing import Literal, Union
 
+import matplotlib.pyplot as plt
 import pandas as pd
+import plotly.express as px
 import requests
 from tqdm import tqdm
-import plotly.express as px
-import matplotlib.pyplot as plt
+
 from pyBiodatafuse.analyzer.utils import (
     plot_hbarplot_chart,
     plot_pie_chart,

diff --git a/src/pyBiodatafuse/annotators/intact.py b/src/pyBiodatafuse/annotators/intact.py
@@ -46,6 +46,39 @@ def check_version_intact() -> dict:
         return {"source_version": "unknown"}
 
 
+def _normalize_intact_id(raw_id: str) -> str:
+    """Normalise a raw IntAct identifier field to ``namespace:accession`` form.
+
+    The IntAct REST API returns identifiers in one of two formats:
+
+    * ``"ID (namespace)"``  – e.g. ``"Q14118 (uniprotkb)"``,
+      ``"CHEBI:15361 (chebi)"``, ``"CPX-3573 (complex portal)"``
+    * ``"namespace:ID"``    – legacy format, kept for safety.
+
+    In both cases the returned string is normalised to ``namespace:ID``.
+    Multi-word namespace suffixes (e.g. ``"complex portal"``) are preserved
+    by joining all tokens between the first ``(`` and the closing ``)``.
+
+    :param raw_id: Raw identifier string from the API response.
+    :returns: Normalised ``namespace:accession`` string.
+    """
+    raw_id = raw_id.strip()
+
+    # Current API format: "ID (namespace)" or "CHEBI:15361 (chebi)"
+    if raw_id.endswith(")") and "(" in raw_id:
+        paren_open = raw_id.index("(")
+        accession = raw_id[:paren_open].strip()
+        namespace = raw_id[paren_open + 1 : -1].strip()  # strip surrounding parens
+        # If the accession already carries a colon (e.g. CHEBI:15361) it is
+        # already in canonical form – return as-is.
+        if ":" in accession:
+            return accession
+        return f"{namespace}:{accession}"
+
+    # Legacy / already-canonical format: "namespace:ID"
+    return raw_id
+
+
 def get_intact_interactions(gene_ids: List[str]) -> List[dict]:
     """Retrieve protein interactions for a list of genes from IntAct.
 
@@ -59,19 +92,24 @@ def get_intact_interactions(gene_ids: List[str]) -> List[dict]:
     encoded_ids = urllib.parse.quote(joined_ids)
     url = f"{Cons.INTACT_ENDPOINT}/ws/interaction/findInteractions/{encoded_ids}?pageSize=200"
 
+    logger.debug("Querying IntAct interactions URL: %s", url)
+
     try:
         response = requests.get(url, timeout=60)
         data = response.json()
 
         content = data.get("content", [])
+        logger.debug("IntAct returned %d interaction records for ids: %s", len(content), gene_ids)
         if not content:
             return []
 
-        interation_info = {
+        # Mapping from our output field names to the IntAct REST API JSON keys.
+        # API reference: https://www.ebi.ac.uk/intact/ws/interaction/findInteractions
+        interaction_field_map = {
             Cons.INTACT_INTERACTION_ID: "ac",
-            Cons.INTACT_INTERACTOR_ID_A: "acA",
-            Cons.INTACT_INTERACTOR_ID_B: "acB",
-            Cons.INTACT_SCORE: "intactMiscore",
+            Cons.INTACT_INTERACTOR_ID_A: "acA",  # IntAct AC for interactor A
+            Cons.INTACT_INTERACTOR_ID_B: "acB",  # IntAct AC for interactor B
+            Cons.INTACT_SCORE: "intactMiscore",  # MI-score confidence value
             Cons.INTACT_BIOLOGICAL_ROLE_A: "biologicalRoleA",
             Cons.INTACT_BIOLOGICAL_ROLE_B: "biologicalRoleB",
             Cons.INTACT_TYPE: "type",
@@ -83,39 +121,45 @@ def get_intact_interactions(gene_ids: List[str]) -> List[dict]:
             Cons.INTACT_INTERACTOR_B_SPECIES: "speciesB",
             Cons.INTACT_MOLECULE_A: "moleculeA",
             Cons.INTACT_MOLECULE_B: "moleculeB",
+            # idA / idB: primary identifier in "ID (namespace)" format, e.g.
+            #   "Q14118 (uniprotkb)", "CHEBI:15361 (chebi)", "CPX-3573 (complex portal)"
             Cons.INTACT_ID_A: "idA",
             Cons.INTACT_ID_B: "idB",
             Cons.INTACT_PUBMED_PUBLICATION_ID: "publicationPubmedIdentifier",
         }
 
         interactions = [
-            {key: item.get(value, np.nan) for key, value in interation_info.items()}
+            {key: item.get(api_key, np.nan) for key, api_key in interaction_field_map.items()}
             for item in content
         ]
 
-        # cleanup the alternative ids
+        # Normalise idA / idB from "ID (namespace)" → "namespace:ID"
         for interaction in interactions:
-            ids_a = interaction[Cons.INTACT_ID_A]
-            ids_b = interaction[Cons.INTACT_ID_B]
+            raw_a = interaction[Cons.INTACT_ID_A]
+            raw_b = interaction[Cons.INTACT_ID_B]
 
-            if ":" in ids_a:
-                interaction[Cons.INTACT_ID_A] = ids_a.split(" ")[0]  # stays the same
+            if isinstance(raw_a, str):
+                interaction[Cons.INTACT_ID_A] = _normalize_intact_id(raw_a)
             else:
-                idx = ids_a.split(" ")[0]
-                namespace = ids_a.split(" ")[1].replace("(", "").replace(")", "")
-                interaction[Cons.INTACT_ID_A] = f"{namespace}:{idx}"
-
-            if ":" in ids_b:
-                interaction[Cons.INTACT_ID_B] = ids_b.split(" ")[0]  # stays the same
+                logger.debug(
+                    "Unexpected non-string idA value: %r (interaction %s)",
+                    raw_a,
+                    interaction.get(Cons.INTACT_INTERACTION_ID),
+                )
+
+            if isinstance(raw_b, str):
+                interaction[Cons.INTACT_ID_B] = _normalize_intact_id(raw_b)
             else:
-                idx = ids_b.split(" ")[0]
-                namespace = ids_b.split(" ")[1].replace("(", "").replace(")", "")
-                interaction[Cons.INTACT_ID_B] = f"{namespace}:{idx}"
+                logger.debug(
+                    "Unexpected non-string idB value: %r (interaction %s)",
+                    raw_b,
+                    interaction.get(Cons.INTACT_INTERACTION_ID),
+                )
 
         return interactions
 
     except requests.RequestException as e:
-        logger.warning(f"Batch request failed for genes {gene_ids}: {e}")
+        logger.warning("Batch request failed for ids %s: %s", gene_ids, e)
         return []
 
 
@@ -126,12 +170,16 @@ def get_protein_intact_acs(id_of_interest: str) -> List[str]:
     :returns: Interactor information if possible, empty list if not.
     """
     url = f"{Cons.INTACT_ENDPOINT}/ws/interactor/findInteractor/{id_of_interest}?pageSize=100"
+    logger.debug("Querying IntAct interactor lookup URL: %s", url)
     try:
         response = requests.get(url, timeout=120)
         response.raise_for_status()
         data = response.json()
 
         content = data.get("content", [])
+        logger.debug(
+            "IntAct interactor lookup returned %d records for %s", len(content), id_of_interest
+        )
 
         protein_acs = []
         for item in content:
@@ -141,10 +189,13 @@ def get_protein_intact_acs(id_of_interest: str) -> List[str]:
             if interactor_type == "protein":
                 protein_acs.append(interactor_ac)
 
+        logger.debug(
+            "Found %d protein AC(s) for %s: %s", len(protein_acs), id_of_interest, protein_acs
+        )
         return protein_acs
 
     except requests.exceptions.RequestException as e:
-        logger.warning(f"Failed to get interactors for {id_of_interest}: {e}")
+        logger.warning("Failed to get interactors for %s: %s", id_of_interest, e)
         return []
 
 
@@ -170,6 +221,13 @@ def get_filtered_interactions(
     """
     results: Dict[str, List[dict]] = {idx: [] for idx in batch_ids}
     interactions = get_intact_interactions(batch_ids)
+    logger.debug(
+        "Filtering %d raw interactions for batch %s (interaction_type=%s, is_compound=%s)",
+        len(interactions),
+        batch_ids,
+        interaction_type,
+        is_compound,
+    )
 
     for interaction in interactions:
         if interaction_type in Cons.INTACT_GENE_INTERACTION_TYPES and not is_compound:
@@ -234,6 +292,13 @@ def get_filtered_interactions(
                 keep_interaction = True
 
         if not keep_interaction:
+            logger.debug(
+                "Skipping interaction %s (id_a=%s, id_b=%s) – does not match type '%s'",
+                interaction.get(Cons.INTACT_INTERACTION_ID),
+                id_a,
+                id_b,
+                interaction_type,
+            )
             continue
 
         for idx in batch_ids:
@@ -252,9 +317,12 @@ def get_filtered_interactions(
 
     for gene_id in batch_ids:
         if not results[gene_id]:
+            logger.debug("No interactions found for %s – inserting empty entry", gene_id)
             empty_entry = {key: np.nan for key in Cons.INTACT_OUTPUT_DICT}
             empty_entry["intact_link_to"] = np.nan
             results[gene_id] = [empty_entry]
+        else:
+            logger.debug("Kept %d interaction(s) for %s", len(results[gene_id]), gene_id)
 
     return results
 

diff --git a/src/pyBiodatafuse/graph/rdf/graphdb.py b/src/pyBiodatafuse/graph/rdf/graphdb.py
@@ -83,9 +83,7 @@ def create_repository(
                 graphdb:enable-literal-index "true" ;
             ]
             ].
-        """.format(
-            repository_name=repository_name
-        )
+        """.format(repository_name=repository_name)
         # Save the configuration content to a temporary file
         with tempfile.NamedTemporaryFile(delete=False, suffix=".ttl") as temp_file:
             temp_file.write(config_content.encode("utf-8"))

diff --git a/tests/annotators/test_intact.py b/tests/annotators/test_intact.py
@@ -39,7 +39,7 @@ def test_get_interactions(self):
                         "interaction_id": "EBI-7882257",
                         "interactor_id_A": "EBI-1755945",
                         "interactor_id_B": "EBI-1755945",
-                        "score": 0.0,
+                        "score": 0.56,
                         "biological_role_A": "unspecified role",
                         "biological_role_B": "unspecified role",
                         "type": "direct interaction",
@@ -60,7 +60,7 @@ def test_get_interactions(self):
                         "interaction_id": "EBI-7882311",
                         "interactor_id_A": "EBI-1755945",
                         "interactor_id_B": "EBI-1755945",
-                        "score": 0.0,
+                        "score": 0.56,
                         "biological_role_A": "unspecified role",
                         "biological_role_B": "unspecified role",
                         "type": "direct interaction",
@@ -81,7 +81,7 @@ def test_get_interactions(self):
                         "interaction_id": "EBI-5327885",
                         "interactor_id_A": "EBI-5327879",
                         "interactor_id_B": "EBI-1755945",
-                        "score": 0.0,
+                        "score": 0.4,
                         "biological_role_A": "unspecified role",
                         "biological_role_B": "unspecified role",
                         "type": "physical association",
@@ -130,7 +130,7 @@ def test_get_compound_interactions(self):
                         "interaction_id": "EBI-9301798",
                         "interactor_id_A": "EBI-9096",
                         "interactor_id_B": "EBI-6621808",
-                        "score": 0.0,
+                        "score": 0.44,
                         "biological_role_A": "enzyme",
                         "biological_role_B": "enzyme target",
                         "type": "enzymatic reaction",
@@ -151,7 +151,7 @@ def test_get_compound_interactions(self):
                         "interaction_id": "EBI-6621805",
                         "interactor_id_A": "EBI-372327",
                         "interactor_id_B": "EBI-6621808",
-                        "score": 0.0,
+                        "score": 0.44,
                         "biological_role_A": "enzyme",
                         "biological_role_B": "enzyme target",
                         "type": "enzymatic reaction",