-
Notifications
You must be signed in to change notification settings - Fork 3
110 lines (108 loc) · 4.12 KB
/
create-source-data.yaml
File metadata and controls
110 lines (108 loc) · 4.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
name: Create Dumps from DBs
on:
workflow_dispatch:
workflow_call:
jobs:
create-runner:
permissions:
contents: read
id-token: write
runs-on: ubuntu-latest
outputs:
label: ${{ steps.create-runner.outputs.label }}
steps:
- name: Create GitHub App installation access token
uses: actions/create-github-app-token@v2
id: app-token
with:
app-id: ${{ vars.GA_APP_ID }}
private-key: ${{ secrets.GA_PRIVATE_KEY }}
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v3
with:
workload_identity_provider: ${{ vars.GCP_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ vars.GCP_SERVICE_ACCOUNT }}
- name: Create Runner on GCP
id: create-runner
uses: related-sciences/gce-github-runner@ab5ba6e5baaf05b344863802d7e6d57ee69f2fd3
with:
token: ${{ steps.app-token.outputs.token }}
vm_name_prefix: gh-create-source
image_project: ubuntu-os-cloud
image_family: ubuntu-2404-lts-amd64
machine_zone: europe-west4-b
machine_type: e2-standard-4
runner_service_account: ${{ vars.RUNNER_GCP_SERVICE_ACCOUNT }}
preemptible: true
ephemeral: true
boot_disk_type: pd-ssd
disk_size: 150GB
export-pgdumps:
needs: create-runner
runs-on: ${{ needs.create-runner.outputs.label }}
steps:
- name: Install Ops Agent
run: |
curl -sSO https://dl.google.com/cloudagents/add-google-cloud-ops-agent-repo.sh
sudo bash add-google-cloud-ops-agent-repo.sh --also-install
# We are running on barebones VM, so there is more scripting involved
# then needed if we were running on standard GitHub Actions runner.
- name: Checkout source
run: |
mkdir src
cd src
git init
git remote add origin $GITHUB_SERVER_URL/$GITHUB_REPOSITORY.git
git fetch origin $GITHUB_REF
git reset --hard FETCH_HEAD
cd ..
- name: Set up PostgreSQL
run: |
sudo apt-get --yes install podman
podman run --rm --name pg -p 127.0.0.1:5432:5432 -e POSTGRES_PASSWORD=12345 -d docker.io/library/postgres:17
while ! podman exec pg pg_isready; do
echo "waiting for postgres..."
sleep 1
done
- name: Setup DuckDB
run: |
sudo apt-get install --yes unzip
curl -L https://install.duckdb.org/v1.4.4/duckdb_cli-linux-amd64.zip > duckdb.zip
unzip duckdb.zip duckdb
sudo mv duckdb /usr/local/bin
export HOME=$(pwd)
duckdb :memory: 'INSTALL postgres;'
- name: Restore databases
run: |
function restore {
local BACKUP="$(gcloud storage ls gs://$1 | sort -r | head -n 1)"
gcloud storage cp "$BACKUP" .
podman exec pg psql -U postgres -c "CREATE DATABASE $2;"
time zstdcat "$(basename "$BACKUP")" \
| podman exec -i pg pg_restore -U postgres -d postgres --clean --create --no-owner --no-privileges
}
restore "$REPLAY_BACKUPS_GCS_BUCKET" bar &
restore "$TEISERVER_BACKUPS_GCS_BUCKET" teiserver_prod &
wait %1 %2
env:
REPLAY_BACKUPS_GCS_BUCKET: ${{ vars.REPLAY_BACKUPS_GCS_BUCKET }}
TEISERVER_BACKUPS_GCS_BUCKET: ${{ vars.TEISERVER_BACKUPS_GCS_BUCKET }}
PGPASSWORD: 12345
PGHOST: 127.0.0.1
PGUSER: postgres
- name: Export parquet files
run: |
mkdir data_export
export HOME=$(pwd)
duckdb -f src/scripts/export_prod_data_source.sql
env:
PGPASSWORD: 12345
PGHOST: 127.0.0.1
PGUSER: postgres
- name: Save data export in GCS bucket
run: |
gcloud config set storage/parallel_composite_upload_compatibility_check False
gcloud storage rsync data_export/ gs://$DATA_MART_GCS_BUCKET/pgdumps --recursive --delete-unmatched-destination-objects
env:
DATA_MART_GCS_BUCKET: ${{ vars.DATA_MART_GCS_BUCKET }}