Skip to content

Commit 7cef9e3

Browse files
committed
adjusted workflow files to process lake area time series data rather than IWP data on Delta NCSA server
1 parent 1bfc0bc commit 7cef9e3

File tree

4 files changed

+1040
-69
lines changed

4 files changed

+1040
-69
lines changed

lake_change_config.py

Lines changed: 85 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,40 @@
11
from datetime import datetime
22
import subprocess
3-
#######################
4-
#### Change me 😁 ####
5-
#######################
6-
# ALWAYS include the tailing slash "/"
3+
import numpy as np
74

5+
# always include the tailing slash "/"
86
# define user on Delta, avoid writing files to other user's dir
97
user = subprocess.check_output("whoami").strip().decode("ascii")
10-
#head_node = 'cn___/'
11-
head_node = 'gpub086'
12-
# define desired location for output files within user dir
13-
# ensures a new subfolder every run as long as new run is not started within same day as last run
14-
# following path is the output subdir for test run, using just on subdir of the alaska files that is only ~8% of the Alaska dir, 23.5 GB
15-
output_subdir = 'lake_change/output/utm_32640_20230411'
16-
#output_subdir = datetime.now().strftime("%b-%d-%y")
17-
# don't use subprocess to retrieve date for subdir because runs might span over 2 days if they go overnight
8+
head_node = 'cn040/'
9+
#head_node = 'gpub___'
1810

19-
##############################
20-
#### END OF Change me 😁 ####
21-
##############################
22-
23-
# input path for all data:
24-
INPUT = '/scratch/bbou/julietcohen/lake_change/input/sample/'
25-
26-
# following path is the OUTPUT for test run, using just on subdir of the alaska files that is only 7.78% of the Alaska dir, 45.57 GB
27-
OUTPUT = f'/scratch/bbou/{user}/{output_subdir}/' # Dir for results. High I/O is good.
11+
INPUT = '/scratch/bbou/julietcohen/lake_change/input/time_series/annual/yr2021/'
12+
output_subdir = 'lake_change/output/time_series_annual/yr2021'
13+
OUTPUT = f'/scratch/bbou/{user}/{output_subdir}/'
2814

2915
STAGING_LOCAL = '/tmp/staged/'
3016
STAGING_REMOTE = OUTPUT + 'staged/'
3117
STAGING_REMOTE_MERGED = STAGING_REMOTE + head_node
3218

3319
GEOTIFF_LOCAL = '/tmp/geotiff/'
34-
GEOTIFF_REMOTE = OUTPUT + 'geotiff/' # Kastan used pathlib.Path(OUTPUT) / pathlib.Path('merged_geotiff_sep9') for this so if it errors try something similar
35-
# check if need a variable GEOTIFF_REMOTE_MERGED after we finish the raster step successfully
20+
GEOTIFF_REMOTE = OUTPUT + 'geotiff/'
3621

37-
#WEBTILE_LOCAL = '/tmp/web_tiles/' # we do not use /tmp for webtile step, it is unique in that way
3822
WEBTILE_REMOTE = OUTPUT + 'web_tiles/'
3923

40-
#THREE_D_PATH = OUTPUT + '3d_tiles/' # workflow does not accomodate 3d-tiling yet
41-
42-
""" FINAL config is exported here, and imported in the IPW Workflow python file. """
24+
""" final config is exported here, and imported in the workflow python file. """
4325
IWP_CONFIG = {
4426
"deduplicate_clip_to_footprint": False,
45-
"dir_output": OUTPUT, # base dir of all output, needs to change every run with definition of output_subdir
46-
"dir_input": INPUT, # base dir of all files to be staged
27+
"deduplicate_method": None,
28+
"deduplicate_at": None,
29+
"dir_output": OUTPUT,
30+
"dir_input": INPUT,
4731
"ext_input": ".gpkg",
48-
"dir_geotiff_remote": GEOTIFF_REMOTE, # we store geotiffs in /scratch after they are created so they are safe after the job concludes, and web-tiling can access all geotiffs in the same directory
49-
"dir_geotiff_local": GEOTIFF_LOCAL, # we write highest level geotiffs to /tmp then transfer to /scratch
50-
"dir_web_tiles": WEBTILE_REMOTE, # we do not use /tmp for webtile step, it writes directly to /scratch
51-
"dir_staged_remote": STAGING_REMOTE, # we rsync the staged files to /scratch to merge, then rasterize and 3dtile with that merged dir
52-
"dir_staged_remote_merged": STAGING_REMOTE_MERGED, # input for raster highest after staged files have been merged
53-
"dir_staged_local": STAGING_LOCAL, # initially write staged files to /tmp so they write faster
32+
"dir_geotiff_remote": GEOTIFF_REMOTE,
33+
"dir_geotiff_local": GEOTIFF_LOCAL,
34+
"dir_web_tiles": WEBTILE_REMOTE,
35+
"dir_staged_remote": STAGING_REMOTE,
36+
"dir_staged_remote_merged": STAGING_REMOTE_MERGED,
37+
"dir_staged_local": STAGING_LOCAL,
5438
"filename_staging_summary": STAGING_REMOTE + "staging_summary.csv",
5539
"filename_rasterization_events": GEOTIFF_REMOTE + "raster_events.csv",
5640
"filename_rasters_summary": GEOTIFF_REMOTE + "raster_summary.csv",
@@ -59,42 +43,82 @@
5943
"tms_id": "WGS1984Quad",
6044
"z_range": [
6145
0,
62-
11
46+
12
6347
],
6448
"geometricError": 57,
6549
"z_coord": 0,
6650
"statistics": [
6751
{
68-
"name": "change_rate", # changed from "coverage"
52+
"name": "permanent_water",
6953
"weight_by": "area",
70-
"property": "ChangeRateGrowth_myr-1", # changed from "area_per_pixel_area", can also be property that is available in input data
71-
"aggregation_method": "sum",
72-
"resampling_method": "sum", # changed from "average"
54+
"property": "permanent_water",
55+
"aggregation_method": "max",
56+
"resampling_method": "mode",
7357
"val_range": [
7458
0,
75-
1
59+
#6088.89 # 99.99th percentile for 2017, this shows the best diversity for perm water
60+
#6105.43 # 99.99th percentile for 2018
61+
#6103.33 # 99.99th percentile for 2019
62+
#6093.07 # 99.99th percentile for 2020
63+
6071.56 # 99.99th percentile for 2021
64+
],
65+
"palette": [
66+
"#1be3ee", # blues
67+
"#1b85ee",
68+
"#1b22ee"
7669
],
77-
"palette": ["#ff0000", # red
78-
"#FF8C00", # DarkOrange
79-
"#FFA07A", # LightSalmon
80-
"#FFFF00", # yellow
81-
"#66CDAA", # MediumAquaMarine
82-
"#AFEEEE", # PaleTurquoise,
83-
"#0000ff"], # blue
8470
"nodata_val": 0,
8571
"nodata_color": "#ffffff00"
8672
},
87-
],
88-
"deduplicate_at": "raster",
89-
"deduplicate_keep_rules": [
90-
[
91-
"Date",
92-
"larger"
93-
]
94-
],
95-
"deduplicate_method": "neighbor",
96-
"deduplicate_keep_rules": [["staging_filename", "larger"]],
97-
"deduplicate_overlap_tolerance": 0.1,
98-
"deduplicate_overlap_both": False,
99-
"deduplicate_centroid_tolerance": None
73+
{
74+
"name": "seasonal_water",
75+
"weight_by": "area",
76+
"property": "seasonal_water",
77+
"aggregation_method": "max",
78+
"resampling_method": "mode",
79+
"val_range": [
80+
0,
81+
#2.66 # 95th percentile for 2017
82+
#2.47 # 95th percentile for 2018
83+
#2.64 # 95th percentile for 2019
84+
#3.01 # 95th percentile for 2020
85+
2.86 # 95th percentile for 2021
86+
],
87+
"palette": [
88+
"#f000d8", # purples
89+
"#c200cc",
90+
"#8b00cc"
91+
],
92+
"nodata_val": 0,
93+
"nodata_color": "#ffffff00"
94+
}
95+
]
96+
# "statistics": [ # for lake change dataset:
97+
# {
98+
# "name": "change_rate",
99+
# "weight_by": "area",
100+
# "property": "ChangeRateNet_myr-1",
101+
# "aggregation_method": "min",
102+
# "resampling_method": "mode",
103+
# "val_range": [
104+
# -2,
105+
# 2
106+
# ],
107+
# "palette": ["#ff0000", # red
108+
# "#FF8C00", # DarkOrange
109+
# "#FFA07A", # LightSalmon
110+
# "#FFFF00", # yellow
111+
# "#66CDAA", # MediumAquaMarine
112+
# "#AFEEEE", # PaleTurquoise,
113+
# "#0000ff"], # blue
114+
# "nodata_val": 0,
115+
# "nodata_color": "#ffffff00" # fully transparent white
116+
# },
117+
# ],
118+
#"deduplicate_at": ["staging"],
119+
#"deduplicate_keep_rules": [["Perimeter_meter","larger"]], # [property, operator], using property with all positive values
120+
#"deduplicate_method": "neighbor",
121+
#"deduplicate_overlap_tolerance": 0.5, # default value
122+
#"deduplicate_overlap_both": False, # only 1 polygon must be overlapping with the deduplicate_overlap_tolerance threshold to be considered dups
123+
#"deduplicate_centroid_tolerance": None # only deduplicate_overlap_tolerance will be used to determine if polygons are dups
100124
}

0 commit comments

Comments
 (0)