|
1 | 1 | from datetime import datetime |
2 | 2 | import subprocess |
3 | | -####################### |
4 | | -#### Change me 😁 #### |
5 | | -####################### |
6 | | -# ALWAYS include the tailing slash "/" |
| 3 | +import numpy as np |
7 | 4 |
|
| 5 | +# always include the tailing slash "/" |
8 | 6 | # define user on Delta, avoid writing files to other user's dir |
9 | 7 | user = subprocess.check_output("whoami").strip().decode("ascii") |
10 | | -#head_node = 'cn___/' |
11 | | -head_node = 'gpub086' |
12 | | -# define desired location for output files within user dir |
13 | | -# ensures a new subfolder every run as long as new run is not started within same day as last run |
14 | | -# following path is the output subdir for test run, using just on subdir of the alaska files that is only ~8% of the Alaska dir, 23.5 GB |
15 | | -output_subdir = 'lake_change/output/utm_32640_20230411' |
16 | | -#output_subdir = datetime.now().strftime("%b-%d-%y") |
17 | | -# don't use subprocess to retrieve date for subdir because runs might span over 2 days if they go overnight |
| 8 | +head_node = 'cn040/' |
| 9 | +#head_node = 'gpub___' |
18 | 10 |
|
19 | | -############################## |
20 | | -#### END OF Change me 😁 #### |
21 | | -############################## |
22 | | - |
23 | | -# input path for all data: |
24 | | -INPUT = '/scratch/bbou/julietcohen/lake_change/input/sample/' |
25 | | - |
26 | | -# following path is the OUTPUT for test run, using just on subdir of the alaska files that is only 7.78% of the Alaska dir, 45.57 GB |
27 | | -OUTPUT = f'/scratch/bbou/{user}/{output_subdir}/' # Dir for results. High I/O is good. |
| 11 | +INPUT = '/scratch/bbou/julietcohen/lake_change/input/time_series/annual/yr2021/' |
| 12 | +output_subdir = 'lake_change/output/time_series_annual/yr2021' |
| 13 | +OUTPUT = f'/scratch/bbou/{user}/{output_subdir}/' |
28 | 14 |
|
29 | 15 | STAGING_LOCAL = '/tmp/staged/' |
30 | 16 | STAGING_REMOTE = OUTPUT + 'staged/' |
31 | 17 | STAGING_REMOTE_MERGED = STAGING_REMOTE + head_node |
32 | 18 |
|
33 | 19 | GEOTIFF_LOCAL = '/tmp/geotiff/' |
34 | | -GEOTIFF_REMOTE = OUTPUT + 'geotiff/' # Kastan used pathlib.Path(OUTPUT) / pathlib.Path('merged_geotiff_sep9') for this so if it errors try something similar |
35 | | -# check if need a variable GEOTIFF_REMOTE_MERGED after we finish the raster step successfully |
| 20 | +GEOTIFF_REMOTE = OUTPUT + 'geotiff/' |
36 | 21 |
|
37 | | -#WEBTILE_LOCAL = '/tmp/web_tiles/' # we do not use /tmp for webtile step, it is unique in that way |
38 | 22 | WEBTILE_REMOTE = OUTPUT + 'web_tiles/' |
39 | 23 |
|
40 | | -#THREE_D_PATH = OUTPUT + '3d_tiles/' # workflow does not accomodate 3d-tiling yet |
41 | | - |
42 | | -""" FINAL config is exported here, and imported in the IPW Workflow python file. """ |
| 24 | +""" final config is exported here, and imported in the workflow python file. """ |
43 | 25 | IWP_CONFIG = { |
44 | 26 | "deduplicate_clip_to_footprint": False, |
45 | | - "dir_output": OUTPUT, # base dir of all output, needs to change every run with definition of output_subdir |
46 | | - "dir_input": INPUT, # base dir of all files to be staged |
| 27 | + "deduplicate_method": None, |
| 28 | + "deduplicate_at": None, |
| 29 | + "dir_output": OUTPUT, |
| 30 | + "dir_input": INPUT, |
47 | 31 | "ext_input": ".gpkg", |
48 | | - "dir_geotiff_remote": GEOTIFF_REMOTE, # we store geotiffs in /scratch after they are created so they are safe after the job concludes, and web-tiling can access all geotiffs in the same directory |
49 | | - "dir_geotiff_local": GEOTIFF_LOCAL, # we write highest level geotiffs to /tmp then transfer to /scratch |
50 | | - "dir_web_tiles": WEBTILE_REMOTE, # we do not use /tmp for webtile step, it writes directly to /scratch |
51 | | - "dir_staged_remote": STAGING_REMOTE, # we rsync the staged files to /scratch to merge, then rasterize and 3dtile with that merged dir |
52 | | - "dir_staged_remote_merged": STAGING_REMOTE_MERGED, # input for raster highest after staged files have been merged |
53 | | - "dir_staged_local": STAGING_LOCAL, # initially write staged files to /tmp so they write faster |
| 32 | + "dir_geotiff_remote": GEOTIFF_REMOTE, |
| 33 | + "dir_geotiff_local": GEOTIFF_LOCAL, |
| 34 | + "dir_web_tiles": WEBTILE_REMOTE, |
| 35 | + "dir_staged_remote": STAGING_REMOTE, |
| 36 | + "dir_staged_remote_merged": STAGING_REMOTE_MERGED, |
| 37 | + "dir_staged_local": STAGING_LOCAL, |
54 | 38 | "filename_staging_summary": STAGING_REMOTE + "staging_summary.csv", |
55 | 39 | "filename_rasterization_events": GEOTIFF_REMOTE + "raster_events.csv", |
56 | 40 | "filename_rasters_summary": GEOTIFF_REMOTE + "raster_summary.csv", |
|
59 | 43 | "tms_id": "WGS1984Quad", |
60 | 44 | "z_range": [ |
61 | 45 | 0, |
62 | | - 11 |
| 46 | + 12 |
63 | 47 | ], |
64 | 48 | "geometricError": 57, |
65 | 49 | "z_coord": 0, |
66 | 50 | "statistics": [ |
67 | 51 | { |
68 | | - "name": "change_rate", # changed from "coverage" |
| 52 | + "name": "permanent_water", |
69 | 53 | "weight_by": "area", |
70 | | - "property": "ChangeRateGrowth_myr-1", # changed from "area_per_pixel_area", can also be property that is available in input data |
71 | | - "aggregation_method": "sum", |
72 | | - "resampling_method": "sum", # changed from "average" |
| 54 | + "property": "permanent_water", |
| 55 | + "aggregation_method": "max", |
| 56 | + "resampling_method": "mode", |
73 | 57 | "val_range": [ |
74 | 58 | 0, |
75 | | - 1 |
| 59 | + #6088.89 # 99.99th percentile for 2017, this shows the best diversity for perm water |
| 60 | + #6105.43 # 99.99th percentile for 2018 |
| 61 | + #6103.33 # 99.99th percentile for 2019 |
| 62 | + #6093.07 # 99.99th percentile for 2020 |
| 63 | + 6071.56 # 99.99th percentile for 2021 |
| 64 | + ], |
| 65 | + "palette": [ |
| 66 | + "#1be3ee", # blues |
| 67 | + "#1b85ee", |
| 68 | + "#1b22ee" |
76 | 69 | ], |
77 | | - "palette": ["#ff0000", # red |
78 | | - "#FF8C00", # DarkOrange |
79 | | - "#FFA07A", # LightSalmon |
80 | | - "#FFFF00", # yellow |
81 | | - "#66CDAA", # MediumAquaMarine |
82 | | - "#AFEEEE", # PaleTurquoise, |
83 | | - "#0000ff"], # blue |
84 | 70 | "nodata_val": 0, |
85 | 71 | "nodata_color": "#ffffff00" |
86 | 72 | }, |
87 | | - ], |
88 | | - "deduplicate_at": "raster", |
89 | | - "deduplicate_keep_rules": [ |
90 | | - [ |
91 | | - "Date", |
92 | | - "larger" |
93 | | - ] |
94 | | - ], |
95 | | - "deduplicate_method": "neighbor", |
96 | | - "deduplicate_keep_rules": [["staging_filename", "larger"]], |
97 | | - "deduplicate_overlap_tolerance": 0.1, |
98 | | - "deduplicate_overlap_both": False, |
99 | | - "deduplicate_centroid_tolerance": None |
| 73 | + { |
| 74 | + "name": "seasonal_water", |
| 75 | + "weight_by": "area", |
| 76 | + "property": "seasonal_water", |
| 77 | + "aggregation_method": "max", |
| 78 | + "resampling_method": "mode", |
| 79 | + "val_range": [ |
| 80 | + 0, |
| 81 | + #2.66 # 95th percentile for 2017 |
| 82 | + #2.47 # 95th percentile for 2018 |
| 83 | + #2.64 # 95th percentile for 2019 |
| 84 | + #3.01 # 95th percentile for 2020 |
| 85 | + 2.86 # 95th percentile for 2021 |
| 86 | + ], |
| 87 | + "palette": [ |
| 88 | + "#f000d8", # purples |
| 89 | + "#c200cc", |
| 90 | + "#8b00cc" |
| 91 | + ], |
| 92 | + "nodata_val": 0, |
| 93 | + "nodata_color": "#ffffff00" |
| 94 | + } |
| 95 | + ] |
| 96 | + # "statistics": [ # for lake change dataset: |
| 97 | + # { |
| 98 | + # "name": "change_rate", |
| 99 | + # "weight_by": "area", |
| 100 | + # "property": "ChangeRateNet_myr-1", |
| 101 | + # "aggregation_method": "min", |
| 102 | + # "resampling_method": "mode", |
| 103 | + # "val_range": [ |
| 104 | + # -2, |
| 105 | + # 2 |
| 106 | + # ], |
| 107 | + # "palette": ["#ff0000", # red |
| 108 | + # "#FF8C00", # DarkOrange |
| 109 | + # "#FFA07A", # LightSalmon |
| 110 | + # "#FFFF00", # yellow |
| 111 | + # "#66CDAA", # MediumAquaMarine |
| 112 | + # "#AFEEEE", # PaleTurquoise, |
| 113 | + # "#0000ff"], # blue |
| 114 | + # "nodata_val": 0, |
| 115 | + # "nodata_color": "#ffffff00" # fully transparent white |
| 116 | + # }, |
| 117 | + # ], |
| 118 | + #"deduplicate_at": ["staging"], |
| 119 | + #"deduplicate_keep_rules": [["Perimeter_meter","larger"]], # [property, operator], using property with all positive values |
| 120 | + #"deduplicate_method": "neighbor", |
| 121 | + #"deduplicate_overlap_tolerance": 0.5, # default value |
| 122 | + #"deduplicate_overlap_both": False, # only 1 polygon must be overlapping with the deduplicate_overlap_tolerance threshold to be considered dups |
| 123 | + #"deduplicate_centroid_tolerance": None # only deduplicate_overlap_tolerance will be used to determine if polygons are dups |
100 | 124 | } |
0 commit comments