1- SELECT
2- active .idc_webapp_collection_id AS collection_id,
3- orig_id_map .idc_collection_id AS collection_uuid,
4- active .collection_name AS name,
5- NULL AS collections,
6- ImageTypes AS image_types,
7- SupportingData AS supporting_data,
8- Subjects AS subject_count,
9- DOI AS doi,
10- URL AS source_url,
11- CancerType AS cancer_type,
12- Species AS species,
13- Location AS location,
14- NULL AS analysis_artifacts,
15- Description AS description,
16- " O" AS collection_type,
17- REGEXP_REPLACE(active .Program ," " ," _" ) AS program,
18- ARRAY_TO_STRING(active .Access ," ; " ) AS access,
19- {etl_date} AS date_updated,
20- tcia_wiki_collection_id,
21- " True" AS active
22- FROM ` idc-dev-etl.{dataset}_pub.original_collections_metadata` AS active
23- JOIN ` idc-dev-etl.{dataset}_dev.collection_id_map` AS orig_id_map
24- ON orig_id_map .idc_webapp_collection_id = active .idc_webapp_collection_id
1+ SELECT
2+ active .collection_id AS collection_id,
3+ STRING_AGG(DISTINCT(orig_id_map .idc_collection_id )) AS collection_uuid,
4+ active .collection_name AS name,
5+ NULL AS collections,
6+ STRING_AGG(DISTINCT(ImageTypes), ' , ' ) AS image_types,
7+ STRING_AGG(DISTINCT(SupportingData), ' , ' ) AS supporting_data,
8+ MAX (Subjects) AS subject_count,
9+ STRING_AGG(DISTINCT(source_doi), ' ' ) AS doi,
10+ STRING_AGG(DISTINCT(source_url), ' ' ) AS source_url,
11+ STRING_AGG(DISTINCT(CancerTypes), ' , ' ) AS cancer_type,
12+ STRING_AGG(DISTINCT(Species), ' , ' ) AS species,
13+ STRING_AGG(DISTINCT(TumorLocations), ' , ' ) AS location,
14+ NULL AS analysis_artifacts,
15+ STRING_AGG(DISTINCT(descs .Description ), ' , ' ) AS description,
16+ " O" AS collection_type,
17+ STRING_AGG(DISTINCT(REGEXP_REPLACE(active .Program ," " ," _" )), ' , ' ) AS program,
18+ STRING_AGG(DISTINCT(Access), ' , ' ) AS access,
19+ CURRENT_DATE () AS date_updated,
20+ STRING_AGG(DISTINCT(active .collection_name ), ' , ' ) AS tcia_wiki_collection_id,
21+ STRING_AGG(DISTINCT(license .license_short_name ), ' , ' ) AS license_short_name,
22+ " True" AS active
23+ FROM ` idc-dev-etl.{data_version}_pub.original_collections_metadata` AS active,
24+ UNNEST(active .Sources ) AS s
25+ JOIN ` idc-dev-etl.{data_version}_dev.collection_id_map` AS orig_id_map
26+ ON orig_id_map .idc_webapp_collection_id = active .collection_id
27+ JOIN ` idc-dev-etl.{data_version}_dev.original_collections_descriptions` AS descs
28+ ON descs .collection_id = active .collection_id
29+ GROUP BY collection_id, name
2530UNION ALL
26- SELECT
27- inactive .idc_webapp_collection_id AS collection_id,
28- orig_id_map .idc_collection_id AS collection_uuid,
29- CASE
30- WHEN inactive .tcia_api_collection_id = ' ' OR inactive .tcia_api_collection_id IS NULL
31- THEN REPLACE(UPPER (inactive .idc_webapp_collection_id )," _" ," -" )
32- ELSE inactive .tcia_api_collection_id
33- END AS name,
34- NULL AS collections,
35- ImageTypes AS image_types,
36- SupportingData AS supporting_data,
37- Subjects AS subject_count,
38- DOI AS doi,
39- URL AS source_url,
40- CancerType AS cancer_type,
41- Species AS species,
42- Location AS location,
43- NULL AS analysis_artifacts,
44- Description AS description,
45- " O" AS collection_type,
46- REGEXP_REPLACE(inactive .Program ," " ," _" ) AS program,
47- ARRAY_TO_STRING(inactive .Access ," ; " ) AS access,
48- {etl_date} AS date_updated,
49- tcia_wiki_collection_id,
50- " False" AS active
51- FROM ` idc-dev-etl.idc_v12_dev.excluded_collections_metadata` AS inactive
52- JOIN ` idc-dev-etl.{dataset}_dev.collection_id_map` AS orig_id_map
53- ON orig_id_map .idc_webapp_collection_id = inactive .idc_webapp_collection_id
31+ SELECT
32+ inactive .idc_webapp_collection_id AS collection_id,
33+ orig_id_map .idc_collection_id AS collection_uuid,
34+ CASE WHEN inactive .tcia_api_collection_id = ' ' OR inactive .tcia_api_collection_id IS NULL
35+ THEN REPLACE(UPPER (inactive .idc_webapp_collection_id )," _" ," -" )
36+ ELSE inactive .tcia_api_collection_id
37+ END AS name,
38+ NULL AS collections,
39+ ImageTypes AS image_types,
40+ SupportingData AS supporting_data,
41+ Subjects AS subject_count,
42+ DOI AS doi,
43+ URL AS source_url,
44+ CancerType AS cancer_type,
45+ Species AS species,
46+ Location AS location,
47+ NULL AS analysis_artifacts,
48+ Description AS description,
49+ " O" AS collection_type,
50+ REGEXP_REPLACE(inactive .Program ," " ," _" ) AS program,
51+ ARRAY_TO_STRING(inactive .Access ," ; " ) AS access,
52+ CURRENT_DATE () AS date_updated,
53+ tcia_wiki_collection_id,
54+ NULL AS license_short_name,
55+ " False" AS active
56+ FROM ` idc-dev-etl.{data_version}_dev.excluded_collections` AS excluded
57+ JOIN ` idc-dev-etl.{data_version}_dev.collection_id_map` AS orig_id_map
58+ ON orig_id_map .idc_collection_id = excluded .idc_collection_id
59+ LEFT JOIN ` idc-dev-etl.idc_v12_dev.excluded_collections_metadata` AS inactive
60+ ON inactive .idc_webapp_collection_id = orig_id_map .idc_webapp_collection_id
5461UNION ALL
55- SELECT
56- ID AS collection_id,
57- analysis_id_map .idc_id AS collection_uuid,
58- Title AS name,
59- Collections AS collections,
60- NULL AS image_types,
61- NULL AS supporting_data,
62- Subjects AS subject_count,
63- DOI AS doi,
64- NULL AS source_url,
65- CancerType AS cancer_type,
66- NULL AS species,
67- Location AS location,
68- AnalysisArtifactsonTCIA AS analysis_artifacts,
69- Description AS description,
70- " A" AS collection_type,
71- NULL AS program,
72- analysis .Access AS access,
73- {etl_date} AS date_updated,
74- NULL AS tcia_wiki_collection_id,
75- CASE
76- WHEN ID IS NULL
77- THEN " False"
78- ELSE " True"
79- END AS active
80- FROM ` idc-dev-etl.{dataset}_pub.analysis_results_metadata` analysis
81- JOIN ` idc-dev-etl.{dataset}_dev.analysis_id_map` AS analysis_id_map
62+ SELECT
63+ analysis .ID AS collection_id,
64+ analysis_id_map .idc_id AS collection_uuid,
65+ Title AS name,
66+ Collections AS collections,
67+ NULL AS image_types,
68+ NULL AS supporting_data,
69+ Subjects AS subject_count,
70+ source_doi AS doi,
71+ source_url AS source_url,
72+ CancerTypes AS cancer_type,
73+ NULL AS species,
74+ TumorLocations AS location,
75+ AnalysisArtifacts AS analysis_artifacts,
76+ adescs .Description AS description,
77+ " A" AS collection_type,
78+ NULL AS program,
79+ analysis .Access AS access,
80+ CURRENT_DATE () AS date_updated,
81+ NULL AS tcia_wiki_collection_id,
82+ license_short_name,
83+ CASE WHEN analysis .ID IS NULL THEN " False" ELSE " True" END AS active
84+ FROM ` idc-dev-etl.{data_version}_pub.analysis_results_metadata` analysis
85+ JOIN ` idc-dev-etl.{data_version}_dev.analysis_id_map` AS analysis_id_map
8286ON analysis_id_map .collection_id = analysis .ID
87+ JOIN ` idc-dev-etl.{data_version}_dev.analysis_results_descriptions` AS adescs
88+ ON adescs .id = analysis .ID
8389ORDER BY collection_id
8490;
0 commit comments