diff --git a/src/ere/adapters/mock_resolver.py b/src/ere/adapters/mock_resolver.py index ba2fd1b..e54d0e9 100644 --- a/src/ere/adapters/mock_resolver.py +++ b/src/ere/adapters/mock_resolver.py @@ -19,20 +19,20 @@ class MockResolver: """ def process_request(self, request: ERERequest) -> EREResponse: - request_id = getattr(request, "ereRequestId", "unknown") + request_id = getattr(request, "ere_request_id", "unknown") log.warning( "MockResolver.process_request: returning placeholder error response " "for request_id=%s — wire a real resolver to enable resolution.", request_id, ) return EREErrorResponse( - ereRequestId=request_id, - errorTitle="Mock resolver — not implemented", - errorDetail=( + ere_request_id=request_id, + error_title="Mock resolver — not implemented", + error_detail=( "This ERE instance is running with the MockResolver placeholder. " "No resolution logic has been configured." ), - errorType="NotImplementedError", + error_type="NotImplementedError", timestamp=datetime.now(timezone.utc).isoformat(), ) diff --git a/src/ere/adapters/redis.py b/src/ere/adapters/redis.py index 212ad0d..0552add 100644 --- a/src/ere/adapters/redis.py +++ b/src/ere/adapters/redis.py @@ -68,11 +68,11 @@ def __init__( def push_request(self, request: ERERequest): log.debug( - f"Redis ERE client, pushing request id: {request.ereRequestId} to channel: {self.request_channel_id}" + f"Redis ERE client, pushing request id: {request.ere_request_id} to channel: {self.request_channel_id}" ) msg_json_str = _linkml_dumper.dumps(request) self._redis_client.lpush(self.request_channel_id, msg_json_str) - log.debug(f"Redis ERE client, request id: {request.ereRequestId} sent") + log.debug(f"Redis ERE client, request id: {request.ere_request_id} sent") def subscribe_responses(self) -> Generator[EREResponse, None, None]: while True: @@ -83,7 +83,7 @@ def subscribe_responses(self) -> Generator[EREResponse, None, None]: _, raw_msg = self._redis_client.brpop(self.response_channel_id) response = get_response_from_message(raw_msg, self.character_encoding) log.debug( - f"Redis ERE client, received response id: {response.ereRequestId}" + f"Redis ERE client, received response id: {response.ere_request_id}" ) yield response except (ConnectionError, TimeoutError) as ex: diff --git a/src/ere/services/redis.py b/src/ere/services/redis.py index 1d31594..a165226 100644 --- a/src/ere/services/redis.py +++ b/src/ere/services/redis.py @@ -79,13 +79,13 @@ async def _pull_request(self) -> ERERequest: ) request = get_request_from_message(raw_msg, self.character_encoding) - log.debug(f"RedisResolutionService, pulled request id: {request.ereRequestId}") + log.debug(f"RedisResolutionService, pulled request id: {request.ere_request_id}") return request def _push_response(self, response: EREResponse): log.debug( - f"RedisResolutionService, pushing response id: {response.ereRequestId} to channel: {self.response_channel_id}" + f"RedisResolutionService, pushing response id: {response.ere_request_id} to channel: {self.response_channel_id}" ) msg_json_str = _linkml_dumper.dumps(response) self._redis_client.lpush(self.response_channel_id, msg_json_str) - log.debug(f"RedisResolutionService, response id: {response.ereRequestId} sent") + log.debug(f"RedisResolutionService, response id: {response.ere_request_id} sent") diff --git a/test/_test_ere_abstracts.py b/test/_test_ere_abstracts.py index 30be5b4..fa3faee 100644 --- a/test/_test_ere_abstracts.py +++ b/test/_test_ere_abstracts.py @@ -29,16 +29,18 @@ from pyparsing import Path from rdflib import Graph -from ere.entrypoints import AbstractClient -from ere.models.core import ( +from ere.adapters.redis import AbstractClient +from erspec.models.ere import ( EntityMentionResolutionRequest, EntityMentionResolutionResponse, + EREErrorResponse, + # FullRebuildRequest, # TODO: Uncomment when available in erspec + # FullRebuildResponse, # TODO: Uncomment when available in erspec +) +from erspec.models.core import ( EntityMention, EntityMentionIdentifier, ClusterReference, - EREErrorResponse, - FullRebuildRequest, - FullRebuildResponse, ) @@ -62,30 +64,30 @@ def test_known_entity_resolution(mock_ere_client: AbstractClient): ) test_entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=test_entity_uri, - sourceId="test-module", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=test_entity_uri, + source_id="test-module", + entity_type=f"{ORG_NS}Organization", ), # Not important here, the mock resolver just looks up static test data # TODO: validation of ID/content match - contentType="text/turtle", + content_type="text/turtle", content="", ) test_req = EntityMentionResolutionRequest( - entityMention=test_entity_mention, - ereRequestId="test-known-entity-resolution-001", + entity_mention=test_entity_mention, + ere_request_id="test-known-entity-resolution-001", timestamp=create_timestamp(), ) mock_ere_client.push_request(test_req) entity_resolution = catch_response( - mock_ere_client, test_req.ereRequestId, EntityMentionResolutionResponse + mock_ere_client, test_req.ere_request_id, EntityMentionResolutionResponse ) assert_that( - entity_resolution.entityMentionId, + entity_resolution.entity_mention_id, "Resolution response has the source entity mention ID", ).is_equal_to(test_entity_mention.identifier) @@ -110,26 +112,26 @@ def test_unknown_entity_resolution(mock_ere_client: AbstractClient): test_entity_uri = f"{ORG_NS}foo_organization_999" test_entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=test_entity_uri, - sourceId="test-module", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=test_entity_uri, + source_id="test-module", + entity_type=f"{ORG_NS}Organization", ), # Not important here, the mock resolver just looks up static test data # TODO: validation of ID/content match - contentType="text/turtle", + content_type="text/turtle", content="", ) test_req = EntityMentionResolutionRequest( - entityMention=test_entity_mention, - ereRequestId="test-unknown-entity-resolution-001", + entity_mention=test_entity_mention, + ere_request_id="test-unknown-entity-resolution-001", timestamp=create_timestamp(), ) mock_ere_client.push_request(test_req) entity_resolution = catch_response( - mock_ere_client, test_req.ereRequestId, EntityMentionResolutionResponse + mock_ere_client, test_req.ere_request_id, EntityMentionResolutionResponse ) candidate_clusters = entity_resolution.candidates @@ -148,39 +150,42 @@ def test_unknown_entity_resolution(mock_ere_client: AbstractClient): ).is_equal_to(1) -def test_ere_acknowledges_rebuild_request(mock_ere_client: AbstractClient): - """ - Scenario: The ERE acknowledges a rebuild request - """ - - rebuild_request = FullRebuildRequest( - ereRequestId="test-ere-acknowledges-rebuild-request-001", - timestamp=create_timestamp(), - ) - - mock_ere_client.push_request(rebuild_request) - - # Does all the assertions we want here - catch_response(mock_ere_client, rebuild_request.ereRequestId, FullRebuildResponse) - - -def test_ere_still_working_after_rebuild(mock_ere_client: AbstractClient): - """ - Scenario: The ERE keeps resolving entities as usually after a rebuild request - """ - - # First, send a rebuild request - rebuild_request = FullRebuildRequest( - ereRequestId="test-ere-still-working-after-rebuild-001", - timestamp=create_timestamp(), - ) - - mock_ere_client.push_request(rebuild_request) - catch_response(mock_ere_client, rebuild_request.ereRequestId, FullRebuildResponse) - - # Now just repeat previous tests - test_known_entity_resolution(mock_ere_client) - test_unknown_entity_resolution(mock_ere_client) +# TODO: Uncomment when FullRebuildRequest/Response are available in erspec +# def test_ere_acknowledges_rebuild_request(mock_ere_client: AbstractClient): +# """ +# Scenario: The ERE acknowledges a rebuild request +# """ +# +# rebuild_request = FullRebuildRequest( +# ere_request_id="test-ere-acknowledges-rebuild-request-001", +# timestamp=create_timestamp(), +# ) +# +# mock_ere_client.push_request(rebuild_request) +# +# # Does all the assertions we want here +# catch_response(mock_ere_client, rebuild_request.ere_request_id, FullRebuildResponse) + + +# TODO: Uncomment when FullRebuildRequest/Response are available in erspec +# # TODO: Uncomment when FullRebuildRequest/Response are available in erspec +# def test_ere_still_working_after_rebuild(mock_ere_client: AbstractClient): +# """ +# Scenario: The ERE keeps resolving entities as usually after a rebuild request +# """ +# +# # First, send a rebuild request +# rebuild_request = FullRebuildRequest( +# ere_request_id="test-ere-still-working-after-rebuild-001", +# timestamp=create_timestamp(), +# ) +# +# mock_ere_client.push_request(rebuild_request) +# catch_response(mock_ere_client, rebuild_request.ere_request_id, FullRebuildResponse) +# +# # Now just repeat previous tests +# test_known_entity_resolution(mock_ere_client) +# test_unknown_entity_resolution(mock_ere_client) def test_ere_replies_with_error_response_to_malformed_request( @@ -191,12 +196,12 @@ def test_ere_replies_with_error_response_to_malformed_request( """ # Send a malformed request (content type is unsupported) malformed_request = EntityMentionResolutionRequest( - ereRequestId="test-bad-resolution-req-001", - entityMention=EntityMention( - identifier=EntityMentionIdentifier( - requestId="", sourceId="test-module", entityType="FooType" + ere_request_id="test-bad-resolution-req-001", + entity_mention=EntityMention( + identifiedBy=EntityMentionIdentifier( + request_id="", source_id="test-module", entity_type="FooType" ), # Malformed part - contentType="text/turtle", + content_type="text/turtle", content="", ), timestamp=create_timestamp(), @@ -204,16 +209,16 @@ def test_ere_replies_with_error_response_to_malformed_request( mock_ere_client.push_request(malformed_request) error_response = catch_response( - mock_ere_client, malformed_request.ereRequestId, EREErrorResponse + mock_ere_client, malformed_request.ere_request_id, EREErrorResponse ) assert_that( - error_response.errorTitle, "The response has the expected error title" + error_response.error_title, "The response has the expected error title" ).contains("MockResolver, unsupported entity type") assert_that( - error_response.errorDetail, "The response has the expected error detail" + error_response.error_detail, "The response has the expected error detail" ).contains("MockResolver, unsupported entity type") - assert_that(error_response.errorType, "The response has an error type").is_equal_to( + assert_that(error_response.error_type, "The response has an error type").is_equal_to( "ValueError" ) diff --git a/test/_test_ere_pubsub_service.py b/test/_test_ere_pubsub_service.py index cf8b587..82240c0 100644 --- a/test/_test_ere_pubsub_service.py +++ b/test/_test_ere_pubsub_service.py @@ -13,12 +13,15 @@ from assertpy import assert_that from ere_test import EPD_NS, ORG_NS, MockResolver, catch_response, create_timestamp -from ere.entrypoints import AbstractClient -from ere.models.core import ( +from ere.adapters.redis import AbstractClient +from erspec.models.ere import ( EntityMentionResolutionRequest, EntityMentionResolutionResponse, ERERequest, EREResponse, + EREErrorResponse, +) +from erspec.models.core import ( ClusterReference, EntityMention, EntityMentionIdentifier, @@ -39,38 +42,40 @@ def test_known_entity_resolution(mock_ere_client: AbstractClient): ) expected_cluster = ClusterReference( - clusterId=f"{EPD_NS}id_2023-S-210-662860_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_Cluster", - confidenceScore=0.98, + cluster_id=f"{EPD_NS}id_2023-S-210-662860_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_Cluster", + confidence_score=0.98, + similarity_score=0.98, ) expected_alt_cluster = ClusterReference( - clusterId=f"{EPD_NS}id_2023-S-210-661238_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_alt_Cluster", - confidenceScore=0.80, + cluster_id=f"{EPD_NS}id_2023-S-210-661238_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_alt_Cluster", + confidence_score=0.80, + similarity_score=0.80, ) test_entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=test_entity_uri, - sourceId="test-module", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=test_entity_uri, + source_id="test-module", + entity_type=f"{ORG_NS}Organization", ), # Not important here, the mock resolver just looks up static test data # TODO: validation of ID/content match - contentType="text/turtle", + content_type="text/turtle", content="", ) test_req = EntityMentionResolutionRequest( - entityMention=test_entity_mention, - ereRequestId="test-known-entity-resolution-001", + entity_mention=test_entity_mention, + ere_request_id="test-known-entity-resolution-001", timestamp=create_timestamp(), ) mock_ere_client.push_request(test_req) entity_resolution: EntityMentionResolutionResponse = catch_response( - mock_ere_client, test_req.ereRequestId, EntityMentionResolutionResponse + mock_ere_client, test_req.ere_request_id, EntityMentionResolutionResponse ) assert_that( - entity_resolution.entityMentionId, + entity_resolution.entity_mention_id, "Resolution response has the source entity mention ID", ).is_equal_to(test_entity_mention.identifier) @@ -131,14 +136,14 @@ def guarded_get() -> ERERequest | None: log.debug("Service: pulling request from queue") # Needs to go in a thread, in order to not block the event loop in waiting request = await asyncio.to_thread(guarded_get) - id = request.ereRequestId if request else "None" + id = request.ere_request_id if request else "None" log.debug(f"Service: got a request from queue, id: {id}") return request def _push_response(self, response: EREResponse): - log.debug(f"Service: pushing response to queue, id: {response.ereRequestId}") + log.debug(f"Service: pushing response to queue, id: {response.ere_request_id}") _response_queue.put_nowait(response) - log.debug(f"Service: pushed response to queue, id: {response.ereRequestId}") + log.debug(f"Service: pushed response to queue, id: {response.ere_request_id}") class FooPubSubClient(AbstractClient): @@ -150,13 +155,13 @@ class FooPubSubClient(AbstractClient): """ def push_request(self, request: ERERequest): - log.debug(f"Client: pushing request to queue, id: {request.ereRequestId}") + log.debug(f"Client: pushing request to queue, id: {request.ere_request_id}") _request_queue.put_nowait(request) - log.debug(f"Client: pushed request to queue, id: {request.ereRequestId}") + log.debug(f"Client: pushed request to queue, id: {request.ere_request_id}") def subscribe_responses(self) -> Generator[EREResponse, None, None]: while True: log.debug("Client: waiting for response from queue") response = _response_queue.get() - log.debug(f"Client: got a response from queue, id: {response.ereRequestId}") + log.debug(f"Client: got a response from queue, id: {response.ere_request_id}") yield response diff --git a/test/_test_ere_service_redis.py b/test/_test_ere_service_redis.py index 6b34b32..2f85eec 100644 --- a/test/_test_ere_service_redis.py +++ b/test/_test_ere_service_redis.py @@ -18,15 +18,17 @@ ) from testcontainers.redis import RedisContainer -from ere.entrypoints import AbstractClient +from ere.adapters.redis import AbstractClient from ere.adapters.redis import RedisEREClient -from ere.models.core import ( +from erspec.models.ere import ( EntityMentionResolutionRequest, EntityMentionResolutionResponse, + EREErrorResponse, +) +from erspec.models.core import ( ClusterReference, EntityMention, EntityMentionIdentifier, - EREErrorResponse, ) from ere.services.redis import RedisResolutionService @@ -44,38 +46,40 @@ def test_known_entity_resolution(mock_ere_client: AbstractClient): ) expected_cluster = ClusterReference( - clusterId=f"{EPD_NS}id_2023-S-210-662860_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_Cluster", - confidenceScore=0.98, + cluster_id=f"{EPD_NS}id_2023-S-210-662860_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_Cluster", + confidence_score=0.98, + similarity_score=0.98, ) expected_alt_cluster = ClusterReference( - clusterId=f"{EPD_NS}id_2023-S-210-661238_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_alt_Cluster", - confidenceScore=0.80, + cluster_id=f"{EPD_NS}id_2023-S-210-661238_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_alt_Cluster", + confidence_score=0.80, + similarity_score=0.80, ) test_entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=test_entity_uri, - sourceId="test-module", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=test_entity_uri, + source_id="test-module", + entity_type=f"{ORG_NS}Organization", ), # Not important here, the mock resolver just looks up static test data # TODO: validation of ID/content match - contentType="text/turtle", + content_type="text/turtle", content="", ) test_req = EntityMentionResolutionRequest( - entityMention=test_entity_mention, - ereRequestId="test-known-entity-resolution-001", + entity_mention=test_entity_mention, + ere_request_id="test-known-entity-resolution-001", timestamp=create_timestamp(), ) mock_ere_client.push_request(test_req) entity_resolution = catch_response( - mock_ere_client, test_req.ereRequestId, EntityMentionResolutionResponse + mock_ere_client, test_req.ere_request_id, EntityMentionResolutionResponse ) assert_that( - entity_resolution.entityMentionId, + entity_resolution.entity_mention_id, "Resolution response has the source entity mention ID", ).is_equal_to(test_entity_mention.identifier) @@ -95,12 +99,12 @@ def test_ere_replies_with_error_response_to_malformed_request( """ # Send a malformed request (content type is unsupported) malformed_request = EntityMentionResolutionRequest( - ereRequestId="test-bad-resolution-req-001", - entityMention=EntityMention( - identifier=EntityMentionIdentifier( - requestId="", sourceId="test-module", entityType="FooType" + ere_request_id="test-bad-resolution-req-001", + entity_mention=EntityMention( + identifiedBy=EntityMentionIdentifier( + request_id="", source_id="test-module", entity_type="FooType" ), # Malformed part - contentType="text/turtle", + content_type="text/turtle", content="", ), timestamp=create_timestamp(), @@ -108,16 +112,16 @@ def test_ere_replies_with_error_response_to_malformed_request( mock_ere_client.push_request(malformed_request) error_response = catch_response( - mock_ere_client, malformed_request.ereRequestId, EREErrorResponse + mock_ere_client, malformed_request.ere_request_id, EREErrorResponse ) assert_that( - error_response.errorTitle, "The response has the expected error title" + error_response.error_title, "The response has the expected error title" ).contains("MockResolver, unsupported entity type") assert_that( - error_response.errorDetail, "The response has the expected error detail" + error_response.error_detail, "The response has the expected error detail" ).contains("MockResolver, unsupported entity type") - assert_that(error_response.errorType, "The response has an error type").is_equal_to( + assert_that(error_response.error_type, "The response has an error type").is_equal_to( "ValueError" ) diff --git a/test/ere_test/__init__.py b/test/ere_test/__init__.py new file mode 100644 index 0000000..fcd4c41 --- /dev/null +++ b/test/ere_test/__init__.py @@ -0,0 +1,418 @@ +""" +Helpers and mockups for ERE tests. +""" + +import datetime +import hashlib +from logging import getLogger +from pathlib import Path +from typing import Dict, Generator, Iterable + +from assertpy import assert_that +from rdflib import Graph + +from ere.adapters import AbstractResolver +from ere.adapters.redis import AbstractClient +from erspec.models.ere import ( + ERERequest, EREResponse, + EntityMentionResolutionRequest, EntityMentionResolutionResponse, + # FullRebuildRequest, FullRebuildResponse, # TODO: uncomment when available + EREErrorResponse, +) +from erspec.models.core import ( + ClusterReference, + EntityMentionIdentifier +) + +log = getLogger ( __name__ ) + +ERS_TEST_DATA_NS = "https://data.europa.eu/ers/resource/" +ERS_SCHEMA_NS = "https://data.europa.eu/ers/schema/" + +EPD_NS = "http://data.europa.eu/a4g/resource/" +EPO_NS = "http://data.europa.eu/a4g/ontology#" +ORG_NS = "http://www.w3.org/ns/org#" + + +class MockEREClient ( AbstractClient ): + """ + A Mockup ERE client, based on an internal in-memory store loaded with test data. + """ + def __init__ ( self ): + self._init_test_data () + self._response_queue = [] + + def _init_test_data ( self ): + self._resolver = MockResolver () + + def push_request ( self, request: ERERequest ): + result = self._resolver.process_request ( request ) + self._response_queue.append ( result ) + + def subscribe_responses ( self ) -> Generator[EREResponse, None, None]: + while self._response_queue: + yield self._response_queue.pop ( 0 ) + + +# TODO: will become an internal class for the implementation +class _ERECluster: + def __init__ ( + self, + uri: str, + members: Dict [str, float] = {} + ): + self.uri = uri + self.members = members + + + +class MockResolver ( AbstractResolver ): + """ + A mockup in-memory resolver for entity resolution, based on test data. + """ + + SUPPORTED_ENTITY_TYPES = { f"{ORG_NS}Organization", f"{EPO_NS}Procedure" } + + def __init__ ( self ): + self._load_test_data () + self._extract_all_clusters () + + def get_member_clusters ( self, member_uri: str ) -> list[tuple[str, float]]: + """ + Returns: a list of tuples of (cluster URI, confidence score) for the entity URI. + """ + clusters = self._member_index.get ( member_uri ) + if not clusters: return [] + result = [ (cluster.uri, cluster.members [ member_uri ]) for cluster in clusters ] + + return result + + def get_cluster_by_entity ( self, entity_uri: str ) -> _ERECluster: + cluster = self._canonical_entity_index.get ( entity_uri ) + if cluster: return cluster + return self._member_index.get ( entity_uri ) + + def process_request ( self, request: ERERequest ) -> EREResponse: + """ + Dispatches a request to the appropriate handler. + + This is also responsible for wrapping any exception into an :class:`EREErrorResponse`. + """ + + try: + # TODO: this is an initial silly implementation, which violates the Open/Closed principle, move + # it to an abstract method for a resolution service and have a default implementation + # based on a registry + if isinstance ( request, EntityMentionResolutionRequest ): + return self.resolve_entity ( request ) + # TODO: Uncomment when FullRebuildRequest is available in erspec + # elif isinstance ( request, FullRebuildRequest ): + # return self.process_full_rebuild_request ( request ) + else: + raise ValueError ( f'Unsupported request type: { type ( request ) }' ) + + except Exception as ex: + log.error ( f"Error processing request { request.ere_request_id }: { ex }", exc_info = True ) + ex_type = type ( ex ) + ex_name = ex_type.__name__ + + ex_fqn_name = ex_type.__module__ + if ex_fqn_name == 'builtins': ex_fqn_name = '' + if ex_fqn_name: ex_fqn_name += "." + ex_fqn_name += ex_name + + req_type = type ( request ).__name__ + + error_response = EREErrorResponse ( + ere_request_id = request.ere_request_id, + error_title = f"Request processing error: { str ( ex ) }", + error_detail = f"{ex_name} Error while processing request of type { req_type }: { str ( ex ) }", + error_type = ex_fqn_name + ) + return error_response + + + def resolve_entity ( self, request: EntityMentionResolutionRequest ) -> EntityMentionResolutionResponse: + """ + Mocks up an entity resolution, that is: + + TODO: rewrite this comment! + """ + + entity_id = request.entityMention.identifier + + # It's not useful here, but we need to test error responses. + entity_type = request.entityMention.identifier.entityType + if entity_type not in self.SUPPORTED_ENTITY_TYPES: + raise ValueError ( f"MockResolver, unsupported entity type: '{ entity_type }'" ) + + entity_uri = entity_id_2_uri ( entity_id ) + + candidate_clusters = self.get_member_clusters ( entity_uri ) + if not candidate_clusters: + # OK, this goes into a new singleton cluster. + new_cluster_uri = entity_id_2_cluster_uri ( entity_id ) + self._create_new_cluster ( new_cluster_uri, members = { entity_uri: 1.0 } ) + + # I know it's already here, but let's ensure the creation works + candidate_clusters = self.get_member_clusters ( entity_uri ) + + # Sort them + candidate_clusters.sort ( key = lambda x: x [ 1 ], reverse = True ) + + # TODO: low-confidence filter + + if not candidate_clusters: + raise RuntimeError ( f'Internal error during mock entity resolution for entity { entity_uri }: cluster not found or created' ) + + # Transform them into model objects + candidate_clusters = [ + ClusterReference ( cluster_id = clusterId, confidence_score = score, similarity_score = score ) for clusterId, score in candidate_clusters + ] + + result = EntityMentionResolutionResponse ( + ere_request_id = request.ere_request_id, + entity_mention_id = entity_id, + candidates = candidate_clusters, + timestamp = create_timestamp () + ) + return result + + + # TODO: Uncomment when FullRebuildRequest/Response are available in erspec + # def process_full_rebuild_request ( self, request ) -> FullRebuildResponse: + # """ + # Mocks up the processing of a rebuild request by reloading the test data. + # """ + # # Reset to the initial test data, getting rid of new clusters created via requests after initialisation. + # self.__init__ () + # + # # And then we're done + # response = FullRebuildResponse ( + # ere_request_id = request.ere_request_id, + # timestamp = create_timestamp () + # ) + # return response + + + def _load_test_data ( self ): + """ + Populates the internal RDF graph with data from test files. + """ + + self.graph = Graph () + test_dir = Path ( __file__ ).parent.parent / 'resources' + + for ttl_file in test_dir.glob ( 'example*.ttl' ): + # TODO: logging + print ( f'Loading test data from { ttl_file }' ) + self.graph.parse ( str ( ttl_file ), format = 'turtle' ) + + def _create_new_cluster ( + self, + cluster_uri: str = None, + members: Dict [ str, float ] = {} + ) -> _ERECluster: + """ + Creates a new cluster for the given entity and updates the internal data with it. + + Returns: the created ERECluster instance, which can be used to add members. + """ + cluster = _ERECluster ( cluster_uri, members ) + # We also need an index from member URIs to clusters + for member_uri in members.keys (): + if member_uri not in self._member_index: + self._member_index [ member_uri ] = [] + self._member_index [ member_uri ].append ( cluster ) + + return cluster + + + def _extract_all_clusters ( self ) -> Dict[str, _ERECluster]: + """ + Extracts cluster info from test data like: + + epd:id_2023-S-210-662860_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj_Cluster + a ers:Cluster; + ers:membership [ + ers:member epd:id_2023-S-210-661238_ReviewerOrganisation_LLhJHMi9mby8ixbkfyGoWj; + ers:confidence 1.0 # Canonical entity + ], + [...] + . + + Returns: an index from member URIs to ERECluster instances. + """ + + def extract_members ( cluster_uri: str ) -> Dict[str, float]: + """ + Extracts the members of a cluster from the RDF graph, given the cluster URI. + + Returns: a dict of member URI to confidence score. + """ + + members = {} + query = f""" + PREFIX ers: <{ERS_SCHEMA_NS}> + SELECT ?member ?confidence WHERE {{ + <{ cluster_uri }> ers:membership ?membership . + ?membership ers:member ?member ; + ers:confidence ?confidence . + }} + """ + for row in self.graph.query ( query ): + member_uri = str ( row['member'] ) + score = float ( row['confidence'] ) + members [ member_uri ] = score + + return members + + self._member_index: Dict[str, list[_ERECluster]] = {} + + query = f""" + PREFIX ers: <{ERS_SCHEMA_NS}> + + SELECT ?cluster WHERE {{ + ?cluster a ers:Cluster . + }} + """ + + for row in self.graph.query ( query ): + cluster_uri = str ( row [ 'cluster' ] ) + print ( f"Loading cluster { cluster_uri }" ) + members = extract_members ( cluster_uri ) + + self._create_new_cluster ( cluster_uri, members ) + + if not self._member_index: + raise ValueError ( 'No clusters found in the test data' ) + + # /end: _extract_all_clusters () + + +def hash_uri ( uri: str ) -> str: + """ + Generates a simple hash for URIs to be used for tasks like generating a cluster URI + + TODO: is it still needed? + TODO: utils module + """ + + return hashlib.md5 ( uri.encode ( 'utf-8' ) ).hexdigest () + + +def extract_resource_rdf ( graph: Graph, resource_uri: str ) -> Graph: + """ + Fetches subject-centric triples from the test data, up to a couple of levels deep. + + TODO: do we still need it? + """ + + sparql = """ + CONSTRUCT { + ?myent ?p ?o. + ?o ?p1 ?o1. + ?o1 ?p2 ?o2 + } + WHERE { + bind ( <%s> AS ?myent ) + ?myent ?p ?o. + + OPTIONAL { + ?o ?p1 ?o1. + OPTIONAL { ?o1 ?p2 ?o2. } + } + } + """ + sparql = sparql % resource_uri + entity_graph = graph.query ( sparql ).graph + if len ( entity_graph ) == 0: + raise ValueError ( f'No RDF found for entity { resource_uri }' ) + return entity_graph +# /end: _extract_entity_rdf () + + +def catch_response ( + ere_cli: AbstractClient, request_id: str, type_to_check: type[EREResponse] = None +) -> EREResponse: + """ + Subscribes to to ERE responses and keeps getting responses until one with the given + request ID is found. + + If the response flow stops (eg, channel closed, system went down), raises a :class:`RuntimeError` + + If type_to_check isn't None, asserts that the response is an instance of the given type. + """ + + for response in ere_cli.subscribe_responses (): + if response.ere_request_id == request_id: + if type_to_check: + assert_that ( response, f"Response for request ID '{request_id}' is of the expected type" )\ + .is_instance_of ( type_to_check ) + return response + raise RuntimeError ( f"No response found for request ID '{request_id}'" ) + + +def entity_id_2_uri ( entity_id: EntityMentionIdentifier ) -> str: + """ + Gets an entity URI from the entity mention ID. + + This works under the mock-up data conventions, ie, the entity mention ID has the entity URI as its + `requestId` field. + + Later, we will complement this with a real implementation. + """ + return entity_id.requestId + +def entity_id_2_cluster_uri ( entity_id: EntityMentionIdentifier ) -> str: + """ + Gets a cluster URI from the entity mention ID. + + This works under the mock-up data conventions, ie, when a new singleton cluster is created, + its URI is :function:`entity_id_2_uri` plus a postfix, which means (by the same conventions), + it's the requested entity's URI plus a postfix. + + Later, we will complement this with a real implementation. + """ + entity_uri = entity_id_2_uri ( entity_id ) + return f'{entity_uri}_Cluster' + + +def create_timestamp () -> str: + """ + Factorises the timestamp generation for responses, yielding an ISO-formatted now. + + TODO: to be moved to a utils module. + """ + return datetime.datetime.now( datetime.UTC ).isoformat() + + +def prefix_common_namespaces ( rdf_or_sparql_body: str ) -> str: + """ + Simple helper to have your Turtle or SPARQL string prefixed with common namespace prefixes. + + TODO: do we still need it? + """ + return """ + PREFIX cccev: + PREFIX dct: + PREFIX ep: + PREFIX epd: + PREFIX epo: + PREFIX locn: + PREFIX org: + PREFIX owl: + PREFIX ql: + PREFIX rdf: + PREFIX rdfs: + PREFIX rml: + PREFIX rr: + PREFIX skos: + PREFIX tedm: + PREFIX time: + PREFIX xsd: + + """ + rdf_or_sparql_body + + + diff --git a/test/steps/_test_entity_resolution_steps.py b/test/steps/_test_entity_resolution_steps.py index 6a97f0e..7dcec2b 100644 --- a/test/steps/_test_entity_resolution_steps.py +++ b/test/steps/_test_entity_resolution_steps.py @@ -8,13 +8,15 @@ from assertpy import assert_that from pytest_bdd import given, when, then, parsers -from ere.models.core import ( - EntityMention, - EntityMentionIdentifier, +from erspec.models.ere import ( EntityMentionResolutionRequest, EntityMentionResolutionResponse, EREErrorResponse, ) +from erspec.models.core import ( + EntityMention, + EntityMentionIdentifier, +) from ere_test import MockEREClient, ORG_NS, create_timestamp @@ -57,18 +59,18 @@ def step_submit_known_entity_request(entity_id, resolution_context): # Construct request using test data conventions entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=entity_id, - sourceId="bdd-test", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=entity_id, + source_id="bdd-test", + entity_type=f"{ORG_NS}Organization", ), - contentType="text/turtle", + content_type="text/turtle", content="", ) request = EntityMentionResolutionRequest( - entityMention=entity_mention, - ereRequestId=f"bdd-test-{entity_id}", + entity_mention=entity_mention, + ere_request_id=f"bdd-test-{entity_id}", timestamp=create_timestamp(), ) @@ -84,18 +86,18 @@ def step_submit_unknown_entity_request(resolution_context): unknown_entity_id = "http://data.europa.eu/a4g/resource/unknown_entity_9999" entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId=unknown_entity_id, - sourceId="bdd-test", - entityType=f"{ORG_NS}Organization", + identifiedBy=EntityMentionIdentifier( + request_id=unknown_entity_id, + source_id="bdd-test", + entity_type=f"{ORG_NS}Organization", ), - contentType="text/turtle", + content_type="text/turtle", content="", ) request = EntityMentionResolutionRequest( - entityMention=entity_mention, - ereRequestId="bdd-test-unknown-entity", + entity_mention=entity_mention, + ere_request_id="bdd-test-unknown-entity", timestamp=create_timestamp(), ) @@ -110,18 +112,18 @@ def step_submit_malformed_request(resolution_context): # Use an unsupported entity type to trigger an error entity_mention = EntityMention( - identifier=EntityMentionIdentifier( - requestId="http://example.com/test-entity", - sourceId="bdd-test", - entityType="http://example.com/UnsupportedType", # Not in SUPPORTED_ENTITY_TYPES + identifiedBy=EntityMentionIdentifier( + request_id="http://example.com/test-entity", + source_id="bdd-test", + entity_type="http://example.com/UnsupportedType", # Not in SUPPORTED_ENTITY_TYPES ), - contentType="text/turtle", + content_type="text/turtle", content="", ) request = EntityMentionResolutionRequest( - entityMention=entity_mention, - ereRequestId="bdd-test-malformed", + entity_mention=entity_mention, + ere_request_id="bdd-test-malformed", timestamp=create_timestamp(), ) @@ -133,12 +135,12 @@ def step_submit_malformed_request(resolution_context): def step_receive_resolution_response(resolution_context): """Verify that a response was received.""" client = resolution_context["client"] - request_id = resolution_context["last_request"].ereRequestId + request_id = resolution_context["last_request"].ere_request_id # Collect responses until we find the one for our request response = None for resp in client.subscribe_responses(): - if resp.ereRequestId == request_id: + if resp.ere_request_id == request_id: response = resp break @@ -169,25 +171,25 @@ def step_response_has_singleton_cluster(resolution_context): # A singleton cluster should have exactly one candidate # (the newly created cluster for the unknown entity) assert_that(len(response.candidates)).is_equal_to(1) - assert_that(response.candidates[0].confidenceScore).is_equal_to(1.0) + assert_that(response.candidates[0].confidence_score).is_equal_to(1.0) @then("I receive an error response") def step_receive_error_response(resolution_context): """Verify that an error response was received.""" client = resolution_context["client"] - request_id = resolution_context["last_request"].ereRequestId + request_id = resolution_context["last_request"].ere_request_id # Collect responses until we find the one for our request response = None for resp in client.subscribe_responses(): - if resp.ereRequestId == request_id: + if resp.ere_request_id == request_id: response = resp break assert_that(response).is_not_none() assert_that(response).is_instance_of(EREErrorResponse) - assert_that(response.errorTitle).is_not_none() - assert_that(response.errorDetail).is_not_none() + assert_that(response.error_title).is_not_none() + assert_that(response.error_detail).is_not_none() resolution_context["last_response"] = response