5959COLLECTION_HEADER_CHK = "collection_uuid"
6060
6161FIELD_MAP = {x : i for i , x in enumerate ([
62- "collection_id" , "collection_uuid" , "name" , "collections" , "image_types" , "supporting_data" , "subject_count" , "doi" ,
63- "source_url" , "cancer_type" , "species" , "location" , "analysis_artifacts" , "description" , "collection_type" ,
64- "program" , "access" , "date_updated" , "tcia_wiki_collection_id" , "license_short_name" , "active"
65- ])}
62+ "collection_id" , "collection_uuid" , "name" , "collections" , "image_types" , "supporting_data" , "subject_count" , "doi" ,
63+ "source_url" , "cancer_type" , "species" , "location" , "analysis_artifacts" , "description" , "collection_type" ,
64+ "program" , "access" , "date_updated" , "tcia_wiki_collection_id" , "license_short_name" , "active" , "total_size" , "total_size_with_ar "
65+ ])}
6666
6767TOKENIZED_FIELDS = ["PatientID" , "SeriesInstanceUID" , "StudyInstanceUID" ]
6868
146146}
147147
148148SOLR_SINGLE_VAL = {
149- "StudyInstanceUID" : ["PatientID" , "StudyInstanceUID" , "crdc_study_uuid" ],
150- "SeriesInstanceUID" : ["PatientID" , "StudyInstanceUID" , "SeriesInstanceUID" , "crdc_study_uuid" , "crdc_series_uuid" ]
149+ "StudyInstanceUID" : ["PatientID" , "StudyInstanceUID" , "crdc_study_uuid" , "instance_size" ],
150+ "SeriesInstanceUID" : ["PatientID" , "StudyInstanceUID" , "SeriesInstanceUID" , "crdc_study_uuid" , "crdc_series_uuid" , "instance_size" ]
151151}
152152
153153ETL_CONFIG = {}
@@ -356,7 +356,7 @@ def load_collections(filename, data_version="8.0"):
356356 exact_collection_fields = [
357357 "collection_id" , "collection_uuid" , "name" , "collections" , "image_types" , "supporting_data" , "subject_count" , "doi" ,
358358 "source_url" , "cancer_type" , "species" , "location" , "analysis_artifacts" , "description" , "collection_type" ,
359- "access" , "date_updated" , "active" ]
359+ "access" , "date_updated" , "active" , "total_size" , "total_size_with_ar" ]
360360 field_map = FIELD_MAP
361361 for line in csv_reader (collection_file ):
362362 if COLLECTION_HEADER_CHK in line :
@@ -366,7 +366,6 @@ def load_collections(filename, data_version="8.0"):
366366 for field in line :
367367 field_map [field ] = i
368368 i += 1
369- print (field_map )
370369 continue
371370 collex = {
372371 'data' : { x : line [field_map [x ]] for x in exact_collection_fields },
0 commit comments