Skip to content

Commit 2b29318

Browse files
authored
[skip ci] Updates for spack v1 at NAS (#1810)
* Build FMS in two different ways * Update NAS configs
1 parent 926b8fa commit 2b29318

File tree

21 files changed

+222
-157
lines changed

21 files changed

+222
-157
lines changed

.github/workflows/ubuntu-ci-x86_64-gnu.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ jobs:
112112
113113
# Concretize and check for duplicates
114114
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
115-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
115+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
116116
117117
# Add and update source cache
118118
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
@@ -190,7 +190,7 @@ jobs:
190190
191191
# Concretize and check for duplicates
192192
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
193-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
193+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
194194
195195
# Add binary cache back in
196196
spack mirror add local-binary file://${BUILD_CACHE_PATH}/

.github/workflows/ubuntu-ci-x86_64-oneapi-ifx.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ jobs:
128128
129129
# Concretize and check for duplicates
130130
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
131-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
131+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
132132
133133
# Add and update source cache
134134
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
@@ -209,7 +209,7 @@ jobs:
209209
210210
# Concretize and check for duplicates
211211
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
212-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
212+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
213213
214214
# Add binary cache back in
215215
spack mirror add local-binary file://${BUILD_CACHE_PATH}/

.github/workflows/ubuntu-ci-x86_64-oneapi.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ jobs:
128128
129129
# Concretize and check for duplicates
130130
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
131-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
131+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
132132
133133
# Add and update source cache
134134
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
@@ -209,7 +209,7 @@ jobs:
209209
210210
# Concretize and check for duplicates
211211
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
212-
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
212+
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms
213213
214214
# Add binary cache back in
215215
spack mirror add local-binary file://${BUILD_CACHE_PATH}/

configs/common/modules_lmod.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,10 @@ modules:
142142
^[email protected]+debug snapshot=none: 'esmf-8.8.0-debug'
143143
^[email protected]~debug snapshot=none: 'esmf-8.9.0'
144144
^[email protected]+debug snapshot=none: 'esmf-8.9.0-debug'
145+
fms:
146+
suffixes:
147+
constants=GFS: 'gfs-constants'
148+
constants=GEOS: 'geos-constants'
145149
openmpi:
146150
environment:
147151
set:

configs/common/modules_tcl.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ modules:
161161
^[email protected]+debug snapshot=none: 'esmf-8.8.0-debug'
162162
^[email protected]~debug snapshot=none: 'esmf-8.9.0'
163163
^[email protected]+debug snapshot=none: 'esmf-8.9.0-debug'
164+
fms:
165+
suffixes:
166+
constants=GFS: 'gfs-constants'
167+
constants=GEOS: 'geos-constants'
164168
openmpi:
165169
environment:
166170
set:

configs/common/packages.yaml

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,10 @@ packages:
107107
fms:
108108
require:
109109
- '@2024.02'
110-
- precision=32,64 +quad_precision +gfs_phys +openmp +pic constants=GFS build_type=Release +deprecated_io
110+
- precision=32,64 +quad_precision +openmp +pic build_type=Release +deprecated_io
111+
- any_of:
112+
- +gfs_phys constants=GFS
113+
- ~gfs_phys constants=GEOS
111114
fontconfig:
112115
require:
113116
- +pic
@@ -206,8 +209,6 @@ packages:
206209
require:
207210
- '@2.53'
208211
- ~shared ~f2py +pflogger
209-
# Note: Please check the sites/tier1/nas/packages_gcc.yaml if
210-
# this is changed, as it has a custom variant combination
211212
met:
212213
require:
213214
- '@12.0.1'

configs/sites/tier1/nas/README.md

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,15 @@
33
In the commands below some will be run on login nodes (with internet access) and some
44
on compute nodes as, at NAS, you aren't allowed more than 2 processes on a login node.
55

6+
## Machines
7+
8+
For the below you will need to login to both an `afe01` node for one step. You'll
9+
also want to get a Rome compute node for the rest of the steps.
10+
611
## Clone spack-stack
712

813
```
9-
git clone --recursive https://github.com/JCSDA/spack-stack.git -b release/1.9.0 spack-stack-1.9.3
14+
git clone --recurse-submodules https://github.com/mathomp4/spack-stack.git -b feature/nas_install_spack_v1 spack-stack-2.0.0-test
1015
```
1116

1217
## Grab interactive node
@@ -23,7 +28,7 @@ We will start on a login node with internet access. This is mainly needed for th
2328
`spack mirror create` command which downloads all the source code for the packages.
2429

2530
```
26-
cd spack-stack-1.9.3
31+
cd spack-stack-2.0.0-test
2732
. setup.sh
2833
```
2934

@@ -37,7 +42,7 @@ are used to create the environments. You only need to do this once.
3742
To create the oneAPI environment, do:
3843

3944
```
40-
spack stack create env --name ue-oneapi-2024.2.0 --template unified-dev --site nas --compiler oneapi
45+
spack stack create env --name ue-oneapi-2024.2.0 --template unified-dev --site nas --compiler=oneapi-2024.2.0
4146
cd envs/ue-oneapi-2024.2.0
4247
```
4348

@@ -46,8 +51,8 @@ cd envs/ue-oneapi-2024.2.0
4651
To create the GCC environment, do:
4752

4853
```
49-
spack stack create env --name ue-gcc-12.3.0 --template unified-dev --site nas --compiler gcc
50-
cd envs/ue-gcc-12.3.0
54+
spack stack create env --name ue-gcc-13.2.0 --template unified-dev --site nas --compiler gcc-13.2.0
55+
cd envs/ue-gcc-13.2.0
5156
```
5257

5358
## Activate environment
@@ -67,41 +72,75 @@ below with this environment.
6772
spack concretize 2>&1 | tee log.concretize
6873
```
6974

75+
NOTE: The first time you do this on a new build, you should do it on a *LOGIN* node. This is because
76+
it might need to bootstrap things and so it will reach out to the internet.
77+
7078
## Create source cache (LOGIN NODE ONLY)
7179

7280
Because this step downloads all the source code for all packages and all versions, it
7381
should be done on a login node with internet access.
7482

7583
```
76-
spack mirror create -a -d /nobackup/gmao_SIteam/spack-stack/source-cache
84+
spack mirror create -a -d /swbuild/gmao_SIteam/spack-stack/source-cache
7785
```
7886

7987
NOTE: Make sure you are in an environment when you run that `spack mirror create` command. Otherwise,
8088
you will download *EVERY* package and *EVERY* version in spack!
8189

90+
## Pre-fetch cargo packages (LOGIN NODE ONLY)
91+
92+
Some packages use Rust/Cargo for dependencies. These need internet access to build. So we pre-fetch them here.
93+
94+
We need to set `CARGO_HOME` to a location where the Cargo deps have been downloaded
95+
96+
```
97+
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
98+
../../util/fetch_cargo_deps.py
99+
```
100+
101+
NOTE: `CARGO_HOME` should be set as well on the COMPUTE node!
102+
82103
## Install packages
83104

84105
Our install process will actually have (at least) three steps. This is because of the `crtm` package
85106
which requires internet access at build time.
86107

87-
### Install crtm dependencies (COMPUTE NODE)
108+
### Install Step 1: Dependencies of Rust codes and ecflow (COMPUTE NODE)
109+
110+
We currently have some codes that use rust/cargo for dependencies. And, for some reason,
111+
even doing the "cargo dependencies" as above, they still need internet
112+
access to build/install.
113+
114+
As for ecflow, we built QT on a login node (as it was the only complete node), so we
115+
then have to build ecflow on a login node as well.
116+
117+
So we first install all the dependencies of then codes.
88118

89119
```
90-
spack install -j 10 --verbose --fail-fast --show-log-on-error --no-check-signature --only dependencies crtm 2>&1 | tee log.install.crtm_dependencies
120+
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
121+
spack install -j 16 --verbose --fail-fast --show-log-on-error --no-check-signature --only dependencies py-cryptography py-maturin py-rpds-py ecflow 2>&1 | tee log.install.deps-for-rust-and-ecflow
91122
```
92123

93-
### Install crtm (LOGIN NODE)
124+
### Install Step 2: Rust Codes and ecflow (AFE LOGIN NODE)
125+
126+
NOTE: You *MUST* run this on an afe login node. The reason is the pfe login nodes are Sandy
127+
Bridge but we are building Spack with `x86_64_v3` and these are too old (`_v2`). So
128+
you will get an illegal instruction error when the install below calls python3.
129+
130+
So go back to an afe login node and run:
94131

95132
```
96-
spack install -j 2 --verbose --fail-fast --show-log-on-error --no-check-signature crtm 2>&1 | tee log.install.crtm
133+
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
134+
spack install -j 2 -p 1 --verbose --fail-fast --show-log-on-error --no-check-signature py-cryptography py-maturin py-rpds-py ecflow 2>&1 | tee log.install.rust-and-ecflow
97135
```
98136

99137
Note we are only using 2 processes here because NAS limits you to 2 processes on a login node.
100138

101-
### Install rest of packages (COMPUTE NODE)
139+
### Install Step 3: The rest (COMPUTE NODE)
102140

103141
```
104-
spack install -j 10 --verbose --fail-fast --show-log-on-error --no-check-signature 2>&1 | tee log.install.after_crtm
142+
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
143+
spack install -j 16 --verbose --fail-fast --show-log-on-error --no-check-signature 2>&1 | tee log.install.after-cargo
105144
```
106145

107146
NOTE: You might need to run the `spack install` command multiple times because sometimes
@@ -120,7 +159,7 @@ Then, once that package is built, you can go back to the compute node and run th
120159
## Update module files and setup meta-modules
121160

122161
```
123-
spack module tcl refresh -y
162+
spack module tcl refresh -y --delete-tree
124163
spack stack setup-meta-modules
125164
```
126165

@@ -129,3 +168,13 @@ spack stack setup-meta-modules
129168
```
130169
spack env deactivate
131170
```
171+
172+
# Debugging a package
173+
174+
When things go wrong, a good way to debug a failure is:
175+
176+
```
177+
spack clean
178+
spack stage <package>
179+
spack build-env <package> -- bash --norc --noprofile
180+
```

configs/sites/tier1/nas/compilers.yaml

Lines changed: 0 additions & 57 deletions
This file was deleted.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11
config:
22
build_jobs: 6
3+
4+
# Overrides for spack build and staging areas to speed up builds
5+
# and avoid errors with Lustre file locking and xattr issues
6+
build_stage: /swbuild/gmao_SIteam/spack-stack/cache/build_stage
7+
test_stage: /swbuild/gmao_SIteam/spack-stack/cache/test_stage
8+
source_cache: /swbuild/gmao_SIteam/spack-stack/cache/source_cache
9+
misc_cache: /swbuild/gmao_SIteam/spack-stack/cache/misc_cache
Lines changed: 2 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,3 @@
11
mirrors:
2-
local-source:
3-
fetch:
4-
url: file:///nobackup/gmao_SIteam/spack-stack/source-cache
5-
access_pair:
6-
- null
7-
- null
8-
access_token: null
9-
profile: null
10-
endpoint_url: null
11-
push:
12-
url: file:///nobackup/gmao_SIteam/spack-stack/source-cache
13-
access_pair:
14-
- null
15-
- null
16-
access_token: null
17-
profile: null
18-
endpoint_url: null
2+
local-source: file:///swbuild/gmao_SIteam/spack-stack/source-cache
3+
local-binary: file:///swbuild/gmao_SIteam/spack-stack/binary-cache-spack-v1

0 commit comments

Comments
 (0)