Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ubuntu-ci-x86_64-gnu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add and update source cache
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
Expand Down Expand Up @@ -190,7 +190,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add binary cache back in
spack mirror add local-binary file://${BUILD_CACHE_PATH}/
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ubuntu-ci-x86_64-oneapi-ifx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add and update source cache
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
Expand Down Expand Up @@ -209,7 +209,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add binary cache back in
spack mirror add local-binary file://${BUILD_CACHE_PATH}/
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/ubuntu-ci-x86_64-oneapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force --fresh 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add and update source cache
spack mirror add local-source file://${SOURCE_CACHE_PATH}/
Expand Down Expand Up @@ -209,7 +209,7 @@ jobs:

# Concretize and check for duplicates
spack concretize --force 2>&1 | tee log.concretize.${ENVNAME}
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env
${SPACK_STACK_DIR}/util/show_duplicate_packages.py -i fms -i crtm -i crtm-fix -i esmf -i mapl -i py-cython -i neptune-env -i fms

# Add binary cache back in
spack mirror add local-binary file://${BUILD_CACHE_PATH}/
Expand Down
4 changes: 4 additions & 0 deletions configs/common/modules_lmod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ modules:
^[email protected]+debug snapshot=none: 'esmf-8.8.0-debug'
^[email protected]~debug snapshot=none: 'esmf-8.9.0'
^[email protected]+debug snapshot=none: 'esmf-8.9.0-debug'
fms:
suffixes:
constants=GFS: 'gfs-constants'
constants=GEOS: 'geos-constants'
openmpi:
environment:
set:
Expand Down
4 changes: 4 additions & 0 deletions configs/common/modules_tcl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,10 @@ modules:
^[email protected]+debug snapshot=none: 'esmf-8.8.0-debug'
^[email protected]~debug snapshot=none: 'esmf-8.9.0'
^[email protected]+debug snapshot=none: 'esmf-8.9.0-debug'
fms:
suffixes:
constants=GFS: 'gfs-constants'
constants=GEOS: 'geos-constants'
openmpi:
environment:
set:
Expand Down
7 changes: 4 additions & 3 deletions configs/common/packages.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ packages:
fms:
require:
- '@2024.02'
- precision=32,64 +quad_precision +gfs_phys +openmp +pic constants=GFS build_type=Release +deprecated_io
- precision=32,64 +quad_precision +openmp +pic build_type=Release +deprecated_io
- any_of:
- +gfs_phys constants=GFS
- ~gfs_phys constants=GEOS
fontconfig:
require:
- +pic
Expand Down Expand Up @@ -206,8 +209,6 @@ packages:
require:
- '@2.53'
- ~shared ~f2py +pflogger
# Note: Please check the sites/tier1/nas/packages_gcc.yaml if
# this is changed, as it has a custom variant combination
met:
require:
- '@12.0.1'
Expand Down
93 changes: 80 additions & 13 deletions configs/sites/tier1/nas/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,33 @@
In the commands below some will be run on login nodes (with internet access) and some
on compute nodes as, at NAS, you aren't allowed more than 2 processes on a login node.

## Machines

For the below you will need to login to both an `afe01` node for one step. You'll
also want to get a Rome compute node for the rest of the steps.

## Clone spack-stack

```
git clone --recursive https://github.com/JCSDA/spack-stack.git -b release/1.9.0 spack-stack-1.9.3
git clone --recurse-submodules https://github.com/mathomp4/spack-stack.git -b feature/nas_install_spack_v1 spack-stack-2.0.0-test
```

## Update packages (until merged into jscda spack-packages)

I seem to need updates for some packages that are not yet in
the jscda spack-packages repo. So we grab them from the spack repo directly.

```
cd spack-stack-2.0.0-test/repos/builtin
git remote add upstream https://github.com/spack/spack-packages.git
git fetch upstream
git checkout upstream/develop -- packages/gftl
git checkout upstream/develop -- packages/gftl_shared
git checkout upstream/develop -- packages/fargparse
git checkout upstream/develop -- packages/pfunit
git checkout upstream/develop -- packages/yafyaml
git checkout upstream/develop -- packages/pflogger
git checkout upstream/develop -- packages/mpt
```

## Grab interactive node
Expand All @@ -23,7 +46,7 @@ We will start on a login node with internet access. This is mainly needed for th
`spack mirror create` command which downloads all the source code for the packages.

```
cd spack-stack-1.9.3
cd spack-stack-2.0.0-test
. setup.sh
```

Expand All @@ -37,7 +60,7 @@ are used to create the environments. You only need to do this once.
To create the oneAPI environment, do:

```
spack stack create env --name ue-oneapi-2024.2.0 --template unified-dev --site nas --compiler oneapi
spack stack create env --name ue-oneapi-2024.2.0 --template unified-dev --site nas --compiler=oneapi-2024.2.0
cd envs/ue-oneapi-2024.2.0
```

Expand All @@ -46,8 +69,8 @@ cd envs/ue-oneapi-2024.2.0
To create the GCC environment, do:

```
spack stack create env --name ue-gcc-12.3.0 --template unified-dev --site nas --compiler gcc
cd envs/ue-gcc-12.3.0
spack stack create env --name ue-gcc-13.2.0 --template unified-dev --site nas --compiler gcc-13.2.0
cd envs/ue-gcc-13.2.0
```

## Activate environment
Expand All @@ -67,41 +90,75 @@ below with this environment.
spack concretize 2>&1 | tee log.concretize
```

NOTE: The first time you do this on a new build, you should do it on a *LOGIN* node. This is because
it might need to bootstrap things and so it will reach out to the internet.

## Create source cache (LOGIN NODE ONLY)

Because this step downloads all the source code for all packages and all versions, it
should be done on a login node with internet access.

```
spack mirror create -a -d /nobackup/gmao_SIteam/spack-stack/source-cache
spack mirror create -a -d /swbuild/gmao_SIteam/spack-stack/source-cache
```

NOTE: Make sure you are in an environment when you run that `spack mirror create` command. Otherwise,
you will download *EVERY* package and *EVERY* version in spack!

## Pre-fetch cargo packages (LOGIN NODE ONLY)

Some packages use Rust/Cargo for dependencies. These need internet access to build. So we pre-fetch them here.

We need to set `CARGO_HOME` to a location where the Cargo deps have been downloaded

```
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
../../util/fetch_cargo_deps.py
```

NOTE: `CARGO_HOME` should be set as well on the COMPUTE node!

## Install packages

Our install process will actually have (at least) three steps. This is because of the `crtm` package
which requires internet access at build time.

### Install crtm dependencies (COMPUTE NODE)
### Install Step 1: Dependencies of Rust codes and ecflow (COMPUTE NODE)

We currently have some codes that use rust/cargo for dependencies. And, for some reason,
even doing the "cargo dependencies" as above, they still need internet
access to build/install.

As for ecflow, we built QT on a login node (as it was the only complete node), so we
then have to build ecflow on a login node as well.

So we first install all the dependencies of then codes.

```
spack install -j 10 --verbose --fail-fast --show-log-on-error --no-check-signature --only dependencies crtm 2>&1 | tee log.install.crtm_dependencies
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
spack install -j 16 --verbose --fail-fast --show-log-on-error --no-check-signature --only dependencies py-cryptography py-maturin py-rpds-py ecflow 2>&1 | tee log.install.deps-for-rust-and-ecflow
```

### Install crtm (LOGIN NODE)
### Install Step 2: Rust Codes and ecflow (AFE LOGIN NODE)

NOTE: You *MUST* run this on an afe login node. The reason is the pfe login nodes are Sandy
Bridge but we are building Spack with `x86_64_v3` and these are too old (`_v2`). So
you will get an illegal instruction error when the install below calls python3.

So go back to an afe login node and run:

```
spack install -j 2 --verbose --fail-fast --show-log-on-error --no-check-signature crtm 2>&1 | tee log.install.crtm
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
spack install -j 2 -p 1 --verbose --fail-fast --show-log-on-error --no-check-signature py-cryptography py-maturin py-rpds-py ecflow 2>&1 | tee log.install.rust-and-ecflow
```

Note we are only using 2 processes here because NAS limits you to 2 processes on a login node.

### Install rest of packages (COMPUTE NODE)
### Install Step 3: The rest (COMPUTE NODE)

```
spack install -j 10 --verbose --fail-fast --show-log-on-error --no-check-signature 2>&1 | tee log.install.after_crtm
export CARGO_HOME=/swbuild/gmao_SIteam/spack-stack/cargo-cache
spack install -j 16 --verbose --fail-fast --show-log-on-error --no-check-signature 2>&1 | tee log.install.after-cargo
```

NOTE: You might need to run the `spack install` command multiple times because sometimes
Expand All @@ -120,7 +177,7 @@ Then, once that package is built, you can go back to the compute node and run th
## Update module files and setup meta-modules

```
spack module tcl refresh -y
spack module tcl refresh -y --delete-tree
spack stack setup-meta-modules
```

Expand All @@ -129,3 +186,13 @@ spack stack setup-meta-modules
```
spack env deactivate
```

# Debugging a package

When things go wrong, a good way to debug a failure is:

```
spack clean
spack stage <package>
spack build-env <package> -- bash --norc --noprofile
```
57 changes: 0 additions & 57 deletions configs/sites/tier1/nas/compilers.yaml

This file was deleted.

7 changes: 7 additions & 0 deletions configs/sites/tier1/nas/config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,9 @@
config:
build_jobs: 6

# Overrides for spack build and staging areas to speed up builds
# and avoid errors with Lustre file locking and xattr issues
build_stage: /swbuild/gmao_SIteam/spack-stack/cache/build_stage
test_stage: /swbuild/gmao_SIteam/spack-stack/cache/test_stage
source_cache: /swbuild/gmao_SIteam/spack-stack/cache/source_cache
misc_cache: /swbuild/gmao_SIteam/spack-stack/cache/misc_cache
19 changes: 2 additions & 17 deletions configs/sites/tier1/nas/mirrors.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,3 @@
mirrors:
local-source:
fetch:
url: file:///nobackup/gmao_SIteam/spack-stack/source-cache
access_pair:
- null
- null
access_token: null
profile: null
endpoint_url: null
push:
url: file:///nobackup/gmao_SIteam/spack-stack/source-cache
access_pair:
- null
- null
access_token: null
profile: null
endpoint_url: null
local-source: file:///swbuild/gmao_SIteam/spack-stack/source-cache
local-binary: file:///swbuild/gmao_SIteam/spack-stack/binary-cache-spack-v1
Loading
Loading