@@ -12,6 +12,7 @@ def setUp(self):
1212 self .community_id = "test_community"
1313 self .api_url = "https://example.com/api.php"
1414 self .custom_path = "custom/path"
15+ self .platform_id = "test_platform"
1516 self .namespaces = [0 , 1 ] # Main and Talk namespaces
1617
1718 # Create a temporary dumps directory
@@ -26,7 +27,11 @@ def tearDown(self):
2627 shutil .rmtree (self .custom_path )
2728
2829 def test_mediawiki_etl_initialization (self ):
29- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
30+ etl = MediawikiETL (
31+ community_id = self .community_id ,
32+ namespaces = self .namespaces ,
33+ platform_id = self .platform_id ,
34+ )
3035 self .assertEqual (etl .community_id , self .community_id )
3136 self .assertTrue (etl .delete_dump_after_load )
3237 self .assertEqual (etl .dump_dir , f"dumps/{ self .community_id } " )
@@ -35,12 +40,17 @@ def test_mediawiki_etl_initialization(self):
3540 community_id = self .community_id ,
3641 namespaces = self .namespaces ,
3742 delete_dump_after_load = False ,
43+ platform_id = self .platform_id ,
3844 )
3945 self .assertFalse (etl .delete_dump_after_load )
4046
4147 def test_extract_with_default_path (self ):
4248 # Create a ETL instance with mocked wikiteam_crawler
43- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
49+ etl = MediawikiETL (
50+ community_id = self .community_id ,
51+ namespaces = self .namespaces ,
52+ platform_id = self .platform_id ,
53+ )
4454 etl .wikiteam_crawler = Mock ()
4555
4656 etl .extract (self .api_url )
@@ -51,7 +61,11 @@ def test_extract_with_default_path(self):
5161
5262 def test_extract_with_custom_path (self ):
5363 # Create a ETL instance with mocked wikiteam_crawler
54- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
64+ etl = MediawikiETL (
65+ community_id = self .community_id ,
66+ namespaces = self .namespaces ,
67+ platform_id = self .platform_id ,
68+ )
5569 etl .wikiteam_crawler = Mock ()
5670
5771 etl .extract (self .api_url , self .custom_path )
@@ -63,7 +77,11 @@ def test_extract_with_custom_path(self):
6377
6478 @patch ("hivemind_etl.mediawiki.etl.parse_mediawiki_xml" )
6579 def test_transform_success (self , mock_parse_mediawiki_xml ):
66- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
80+ etl = MediawikiETL (
81+ community_id = self .community_id ,
82+ namespaces = self .namespaces ,
83+ platform_id = self .platform_id ,
84+ )
6785
6886 # Mock page data
6987 mock_page = Mock ()
@@ -98,7 +116,11 @@ def test_transform_success(self, mock_parse_mediawiki_xml):
98116 @patch ("hivemind_etl.mediawiki.etl.logging" )
99117 @patch ("hivemind_etl.mediawiki.etl.parse_mediawiki_xml" )
100118 def test_transform_error_handling (self , mock_parse_mediawiki_xml , mock_logging ):
101- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
119+ etl = MediawikiETL (
120+ community_id = self .community_id ,
121+ namespaces = self .namespaces ,
122+ platform_id = self .platform_id ,
123+ )
102124
103125 # Mock page that will raise an exception
104126 mock_page = Mock ()
@@ -122,7 +144,11 @@ def get_attribute_error(*args, **kwargs):
122144
123145 @patch ("hivemind_etl.mediawiki.etl.CustomIngestionPipeline" )
124146 def test_load_with_dump_deletion (self , mock_ingestion_pipeline_class ):
125- etl = MediawikiETL (community_id = self .community_id , namespaces = self .namespaces )
147+ etl = MediawikiETL (
148+ community_id = self .community_id ,
149+ namespaces = self .namespaces ,
150+ platform_id = self .platform_id ,
151+ )
126152 documents = [Document (text = "Test content" )]
127153
128154 # Setup the mock
@@ -138,7 +164,7 @@ def test_load_with_dump_deletion(self, mock_ingestion_pipeline_class):
138164
139165 # Verify that methods were called correctly
140166 mock_ingestion_pipeline_class .assert_called_once_with (
141- self .community_id , collection_name = "mediawiki"
167+ self .community_id , collection_name = self . platform_id
142168 )
143169 mock_pipeline .run_pipeline .assert_called_once_with (documents )
144170 self .assertFalse (os .path .exists (etl .dump_dir ))
@@ -149,6 +175,7 @@ def test_load_without_dump_deletion(self, mock_ingestion_pipeline_class):
149175 community_id = self .community_id ,
150176 namespaces = self .namespaces ,
151177 delete_dump_after_load = False ,
178+ platform_id = self .platform_id ,
152179 )
153180 documents = [Document (text = "Test content" )]
154181
@@ -165,7 +192,7 @@ def test_load_without_dump_deletion(self, mock_ingestion_pipeline_class):
165192
166193 # Verify that methods were called correctly
167194 mock_ingestion_pipeline_class .assert_called_once_with (
168- self .community_id , collection_name = "mediawiki"
195+ self .community_id , collection_name = self . platform_id
169196 )
170197 mock_pipeline .run_pipeline .assert_called_once_with (documents )
171198 self .assertTrue (os .path .exists (etl .dump_dir ))
0 commit comments