diff --git a/cloudbuild.yaml b/cloudbuild.yaml index b8f7a20..7a921e2 100644 --- a/cloudbuild.yaml +++ b/cloudbuild.yaml @@ -30,6 +30,7 @@ substitutions: _ENABLE_COS_GPU: "true" _ENABLE_UBUNTU_GPU: "false" _WAIT_FOR_IAP: "true" + _TIER3: "true" tags: - selkies-deploy steps: @@ -243,3 +244,35 @@ steps: timeout 1200 bash ./setup/scripts/wait_for_iap.sh broker@${PROJECT_ID}.iam.gserviceaccount.com $${CLIENT_ID} $${ENDPOINT} waitFor: - fix-autoneg + + ### + # Create image streaming + ### + - name: "gcr.io/cloud-builders/gcloud" + id: "deploy-img-streaming" + dir: "setup/infra/image-streaming" + args: + - "builds" + - "submit" + - "--substitutions=_NAME=${_NAME},_ACTION=${_ACTION},_REGION=${_REGION},_TIER3=${_TIER3}" + waitFor: + - "wait-for-iap-2" + + ### + # Enable image-streaming feature on existing cluster + # Note: Changing the Image streaming setting on a cluster causes GKE to recreate nodes in node pools that inherit the cluster-level setting. Although GKE respects maintenance windows when recreating nodes, + # your workloads might still experience disruptions. + ### + - name: "gcr.io/cloud-builders/kubectl" + id: enable-image-streaming + entrypoint: "bash" + args: + - "-exec" + - | + gcloud container clusters update "$${CLOUDSDK_CONTAINER_CLUSTER}" --enable-image-streaming --project="$${PROJECT_ID}" --region="$${CLOUDSDK_COMPUTE_REGION}" + env: + - "PROJECT_ID=${PROJECT_ID}" + - "CLOUDSDK_CORE_PROJECT=${PROJECT_ID}" + - "CLOUDSDK_COMPUTE_REGION=${_REGION}" + - "CLOUDSDK_CONTAINER_CLUSTER=${_NAME}-${_REGION}" + waitFor: ["deploy-img-streaming"] \ No newline at end of file diff --git a/images/cloudbuild.yaml b/images/cloudbuild.yaml index 458cf96..2b2807a 100644 --- a/images/cloudbuild.yaml +++ b/images/cloudbuild.yaml @@ -18,9 +18,11 @@ substitutions: _USE_CACHE: "false" _IMAGE_PREFIX: kube-pod-broker _IMAGE_TAG: latest - + _TARGET_REPO: us-docker.pkg.dev/${PROJECT_ID}/selkies-images +# options: machineType: N1_HIGHCPU_8 + dynamic_substitutions: true tags: - selkies-images @@ -31,15 +33,23 @@ images: [ # installer 'gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG}', 'gcr.io/${PROJECT_ID}/broker-installer:latest', + '${_TARGET_REPO}/broker-installer:${_IMAGE_TAG}', + '${_TARGET_REPO}/broker-installer:latest', # controller 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:latest', # broker gce proxy 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:latest', # autoneg controller 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}', + '${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:latest' ] steps: @@ -53,14 +63,22 @@ steps: waitFor: ["-"] - name: 'gcr.io/cloud-builders/docker' id: installer - args: [ - 'build', - '-t', 'gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG}', - '--cache-from', 'gcr.io/${PROJECT_ID}/broker-installer:latest', - 'installer' - ] + entrypoint: 'bash' + args: + - '-c' + - | + docker build -t gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/broker-installer:latest installer + docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/broker-installer:${_IMAGE_TAG} + docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/broker-installer:latest + + apt install jq -y + export DOCKER_CLI_EXPERIMENTAL=enabled + chmod +x setup/scripts/check_docker_image.sh + ./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} || true waitFor: - - installer-pull + - installer-pull + + - name: 'gcr.io/cloud-builders/docker' id: installer-tags args: [ @@ -81,14 +99,21 @@ steps: waitFor: ["-"] - name: 'gcr.io/cloud-builders/docker' id: controller - args: [ - 'build', - '-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG}', - '--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest', - 'controller' - ] + entrypoint: 'bash' + args: + - '-c' + - | + docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:latest controller + docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} + docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-controller:latest + + apt install jq -y + export DOCKER_CLI_EXPERIMENTAL=enabled + chmod +x setup/scripts/check_docker_image.sh + ./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-controller:${_IMAGE_TAG} || true waitFor: - - controller-pull + - controller-pull + - name: 'gcr.io/cloud-builders/docker' id: controller-tags args: [ @@ -106,17 +131,23 @@ steps: id: gce-proxy-pull entrypoint: 'bash' args: ["-c", "if [[ '${_USE_CACHE}' == 'true' ]]; then (docker pull gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest || exit 0); fi"] - waitFor: ["-"] + waitFor: ["-"] - name: 'gcr.io/cloud-builders/docker' id: gce-proxy - args: [ - 'build', - '-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG}', - '--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest', - 'gce-proxy' - ] + entrypoint: 'bash' + args: + - '-c' + - | + docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:latest gce-proxy + docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} + docker tag gcr.io/${PROJECT_ID}/broker-installer:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-gce-proxy:laest + apt install jq -y + export DOCKER_CLI_EXPERIMENTAL=enabled + chmod +x setup/scripts/check_docker_image.sh + ./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-gce-proxy:${_IMAGE_TAG} || true waitFor: - - gce-proxy-pull + - gce-proxy-pull + - name: 'gcr.io/cloud-builders/docker' id: gce-proxy-tags args: [ @@ -160,14 +191,19 @@ steps: - name: 'gcr.io/cloud-builders/docker' id: autoneg-controller-docker dir: gke-autoneg-controller - args: [ - 'build', - '-t', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG}', - '--cache-from', 'gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest', - '.' - ] + entrypoint: 'bash' + args: + - '-c' + - | + docker build -t gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} --cache-from gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:latest . + docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} + docker tag gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} ${_TARGET_REPO}/${_IMAGE_PREFIX}-autoneg-controller:laest + apt install jq -y + export DOCKER_CLI_EXPERIMENTAL=enabled + chmod +x setup/scripts/check_docker_image.sh + ./setup/scripts/check_docker_image.sh -i gcr.io/${PROJECT_ID}/${_IMAGE_PREFIX}-autoneg-controller:${_IMAGE_TAG} || true waitFor: - - autoneg-controller-build + - autoneg-controller-build - name: 'gcr.io/cloud-builders/docker' id: autoneg-controller-tags args: [ diff --git a/images/setup/scripts/check_docker_image.sh b/images/setup/scripts/check_docker_image.sh new file mode 100644 index 0000000..98905b5 --- /dev/null +++ b/images/setup/scripts/check_docker_image.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Limitations of GKE image streaming +# 1. You can't use a Secret to pull container images on GKE versions prior to 1.23.5-gke.1900. +# 2. Container images that use the V2 Image Manifest, schema version 1 are not eligible. +# 3. Container images encrypted with customer-managed encryption keys (CMEK) are not eligible for Image streaming. GKE downloads these images without streaming the data. You can still use CMEK to protect attached persistent disks and custom boot disks in clusters that use Image streaming. +# 4. Container images with empty layers or duplicate layers are not eligible for Image streaming. GKE downloads these images without streaming the data. Check your container image for empty layers or duplicate layers. +# 5. The Artifact Registry repository must be in the same region as your GKE nodes, or in a multi-region that corresponds with the region where your nodes are running. For example: +# If your nodes are in us-east1, Image streaming is available for repositories in the us-east1 region or the us multi-region since both GKE and Artifact Registry are running in data center locations within the United States. +# If your nodes are in the northamerica-northeast1 region, the nodes are running in Canada. In this situation, Image streaming is only available for repositories in the same region. +# 6. If your workloads read many files in an image during initialization, you might notice increased initialization times because of the latency added by the remote file reads. +# 7. You might not notice the benefits of Image streaming during the first pull of an eligible image. However, after Image streaming caches the image, future image pulls on any cluster benefit from Image streaming. +# 8. GKE uses the cluster-level configuration to determine whether to enable Image streaming on new node pools created using node auto-provisioning. However, you cannot use workload separation to create node pools with Image streaming enabled when Image streaming is disabled at the cluster level. +# 9. Linux file capabilities such as CAP_NET_RAW are supported with Image streaming in GKE version 1.22.6-gke.300 and later. For previous GKE versions, these capabilities are not available when the image file is streamed, or when the image is saved to the local disk. To avoid potential disruptions, do not use Image streaming for containers with these capabilities in GKE versions prior to 1.22.6-gke.300. If your container relies on Linux file capabilities, it might fail to start with permission denied errors when running with Image streaming enabled. +set -ex +display_usage() { + + echo -e "\nUsage: $0 -i \n" + echo -e "Argument: \n" + echo -e "\t -i: IMAGE_NAME" +} +if [ $# -le 1 ] + then + display_usage + exit 1 +fi + +while getopts i:h: flag +do + case "${flag}" in + i) IMAGE=${OPTARG};; + *) display_usage + exit 1 ;; + esac +done + +# docker pull $IMAGE +DOCKER_SCHEMA_VERSION=$(docker manifest inspect --verbose ${IMAGE} | grep '"schemaVersion": 2,' | wc -l) +LAYERS=$(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | wc -l) +UNIQUE_LAYERS=$(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | uniq | wc -l ) +EMPTY_LAYER=$(docker inspect $IMAGE | jq .[].RootFS.Layers | grep -i "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" | wc -l) + +if [[ $DOCKER_SCHEMA_VERSION -eq 0 ]]; then + echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Docker schema version mismatch, reqires schemaVersion: 2" + echo "[ ERROR ] schemaVersion : $(docker manifest inspect --verbose ${IMAGE} | grep '"schemaVersion"')" + exit 1 +fi + +if [[ $LAYERS -ne $UNIQUE_LAYERS ]]; then + echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Duplicate docker layers." + echo "[ ERROR ] Duplicate layers: $(docker inspect $IMAGE | jq .[].RootFS.Layers | sort | uniq -d)" + exit 1 +fi + +if [[ $EMPTY_LAYER -gt 0 ]]; then + echo "[ ERROR ] Image ${IMAGE} failed to match image streaming criteria. Reason: Empty docker layers." + echo "[ ERROR ] Image contains empty layers with sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + exit 1 +fi + +echo "[ INFO ] Success!!! Image ${IMAGE} matching criteria for image streaming." \ No newline at end of file diff --git a/setup/infra/image-streaming/artifact-registry.tf b/setup/infra/image-streaming/artifact-registry.tf new file mode 100644 index 0000000..6aacbca --- /dev/null +++ b/setup/infra/image-streaming/artifact-registry.tf @@ -0,0 +1,20 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +resource "google_artifact_registry_repository" "selkies-repo" { + provider = google-beta + location = var.region + repository_id = "selkies-images" + description = "selkies image artifact registry" + format = "DOCKER" +} \ No newline at end of file diff --git a/setup/infra/image-streaming/cloudbuild.yaml b/setup/infra/image-streaming/cloudbuild.yaml new file mode 100644 index 0000000..44aab4b --- /dev/null +++ b/setup/infra/image-streaming/cloudbuild.yaml @@ -0,0 +1,50 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +timeout: 10800s +substitutions: + _ACTION: apply + _NAME: broker + _REGION: us-west1 + _TIER3: "true" +tags: + - selkies-node-pool-image-streaming + - selkies-image-streaming +steps: + ### + # Create GCS bucket to store terraform state + ### + - name: "gcr.io/cloud-builders/gsutil" + id: "create-tf-state-bucket" + entrypoint: "/bin/bash" + args: + - "-xe" + - "-c" + - | + gsutil mb gs://${PROJECT_ID}-${_NAME}-tf-state || true + waitFor: ["-"] + + ### + # Apply terraform to create base infrastructure + ### + - name: "gcr.io/${PROJECT_ID}/${_NAME}-installer" + id: "terraform-apply" + entrypoint: "/workspace/deploy.sh" + env: + - TF_VAR_project_id=${PROJECT_ID} + - TF_VAR_name=${_NAME} + - TF_VAR_region=${_REGION} + - TF_VAR_tier3_pool_enabled=${_TIER3} + - TERRAFORM_WORKSPACE_NAME=image-straming-${_REGION} + - ACTION=${_ACTION} \ No newline at end of file diff --git a/setup/infra/image-streaming/deploy.sh b/setup/infra/image-streaming/deploy.sh new file mode 100755 index 0000000..b5d90c5 --- /dev/null +++ b/setup/infra/image-streaming/deploy.sh @@ -0,0 +1,86 @@ +#!/bin/bash + +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +export RED='\033[1;31m' +export CYAN='\033[1;36m' +export GREEN='\033[1;32m' +export NC='\033[0m' # No Color +function log_red() { echo -e "${RED}$@${NC}"; } +function log_cyan() { echo -e "${CYAN}$@${NC}"; } +function log_green() { echo -e "${GREEN}$@${NC}"; } + +SCRIPT_DIR=$(dirname $(readlink -f $0 2>/dev/null) 2>/dev/null || echo "${PWD}/$(dirname $0)") + +cd "${SCRIPT_DIR}" + +# Fetch any Secret Manager secrets named broker-tfvars* and same them to .auto.tfvars files. +for secret in $(gcloud -q secrets list --filter=name~broker-tfvars- --format="value(name)"); do + latest=$(gcloud secrets versions list ${secret} --sort-by=created --format='value(name)' --filter='STATE=enabled' --limit=1) + [[ -z "${latest}" ]] && log_red "WARN: no enabled versions found for secret ${secret}" && continue + dest="${secret/broker-tfvars-/}.auto.tfvars" + log_cyan "Creating ${dest} from secret: ${secret}" + gcloud -q secrets versions access ${latest} --secret ${secret} > ${dest} +done + +# Fetch any Secret Manager secrets named broker-${TF_VAR_region}-tfvars* and same them to .auto.tfvars files. +for secret in $(gcloud -q secrets list --filter=name~broker-${TF_VAR_region}-tfvars- --format="value(name)"); do + latest=$(gcloud secrets versions list ${secret} --sort-by=created --format='value(name)' --filter='STATE=enabled' --limit=1) + [[ -z "${latest}" ]] && log_red "WARN: no enabled versions found for secret ${secret}" && continue + dest="${secret/broker-${TF_VAR_region}-tfvars-/}.auto.tfvars" + log_cyan "Creating ${dest} from secret: ${secret}" + gcloud -q secrets versions access ${latest} --secret ${secret} > ${dest} +done + +# Fetch any Secret Manager secrets named broker-${TF_VAR_region}-node-pool-apps-override-* and same them to *_override.tf files. +for secret in $(gcloud -q secrets list --filter=name~broker-${TF_VAR_region}-node-pool-apps-override- --format="value(name)"); do + latest=$(gcloud secrets versions list ${secret} --sort-by=created --format='value(name)' --filter='STATE=enabled' --limit=1) + [[ -z "${latest}" ]] && log_red "WARN: no enabled versions found for secret ${secret}" && continue + dest="${secret/broker-${TF_VAR_region}-node-pool-apps-override-/}_override.tf" + log_cyan "Creating ${dest} from secret: ${secret}" + gcloud -q secrets versions access ${latest} --secret ${secret} > ${dest} +done + +export TF_IN_AUTOMATION=1 + +# Set default project for google provider. +export GOOGLE_PROJECT=${TF_VAR_project_id?} + +# Initialize backend and select workspace +terraform init -upgrade=true -input=false \ + -backend-config="bucket=${TF_VAR_project_id?}-${TF_VAR_name?}-tf-state" \ + -backend-config="prefix=${TF_VAR_name?}" || true +terraform workspace select ${TERRAFORM_WORKSPACE_NAME?} || terraform workspace new ${TERRAFORM_WORKSPACE_NAME?} +terraform init -input=false \ + -backend-config="bucket=${TF_VAR_project_id?}-${TF_VAR_name?}-tf-state" \ + -backend-config="prefix=${TF_VAR_name?}" || true + +if [[ "${ACTION?}" == "destroy" ]]; then + log_cyan "Running terraform destroy..." + terraform destroy -auto-approve -input=false +elif [[ "${ACTION?}" == "plan" ]]; then + log_cyan "Running terraform plan..." + terraform plan -out terraform.tfplan -input=false +elif [[ "${ACTION?}" == "apply" ]]; then + log_cyan "Running terraform plan..." + terraform plan -out terraform.tfplan -input=false + + log_cyan "Running terraform apply..." + terraform apply -input=false terraform.tfplan +fi + +log_green "Done" \ No newline at end of file diff --git a/setup/infra/image-streaming/main.tf b/setup/infra/image-streaming/main.tf new file mode 100644 index 0000000..a66536b --- /dev/null +++ b/setup/infra/image-streaming/main.tf @@ -0,0 +1,23 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +data "google_container_cluster" "broker" { + name = "${var.name}-${var.region}" + location = var.region + project = var.project_id +} + +data "google_service_account" "broker_cluster" { + account_id = var.name + project = var.project_id +} \ No newline at end of file diff --git a/setup/infra/image-streaming/services.tf b/setup/infra/image-streaming/services.tf new file mode 100644 index 0000000..0e006b7 --- /dev/null +++ b/setup/infra/image-streaming/services.tf @@ -0,0 +1,27 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +resource "google_project_service" "containerfilesystem" { + for_each = toset(var.gcp_service_list) + + project = var.project_id + service = each.key + + timeouts { + create = "30m" + update = "40m" + } + + disable_dependent_services = true + disable_on_destroy = false +} \ No newline at end of file diff --git a/setup/infra/image-streaming/tier3.tf b/setup/infra/image-streaming/tier3.tf new file mode 100644 index 0000000..04c28dc --- /dev/null +++ b/setup/infra/image-streaming/tier3.tf @@ -0,0 +1,107 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +resource "google_container_node_pool" "tier3" { + provider = google-beta + count = var.tier3_pool_enabled ? 1 : 0 + name = "tier3" + location = var.region + cluster = data.google_container_cluster.broker.name + initial_node_count = var.tier3_pool_initial_node_count + + node_config { + preemptible = var.tier3_pool_preemptive_nodes + machine_type = var.tier3_pool_machine_type + + service_account = data.google_service_account.broker_cluster.email + + disk_size_gb = var.tier3_pool_disk_size_gb + disk_type = var.tier3_pool_disk_type + + ephemeral_storage_config { + local_ssd_count = var.tier3_pool_ephemeral_storage_ssd_count + } + + image_type = "COS_CONTAINERD" + # gcfs_config - (Optional) Parameters for the Google Container Filesystem (GCFS). If unspecified, + # GCFS will not be enabled on the node pool. + # When enabling this feature you must specify + # image_type = "COS_CONTAINERD" and node_version from + # GKE versions 1.19 or later to use it. + # For GKE versions 1.19, 1.20, and 1.21, + # the recommended minimum node_version would be + # 1.19.15-gke.1300, 1.20.11-gke.1300, and 1.21.5-gke.1300 respectively. + # A machine_type that has more than 16 GiB of memory is also recommended. + # GCFS must be enabled in order to use image streaming. + # Open ISSUE + # https://github.com/hashicorp/terraform-provider-google/issues/10509 + gcfs_config { + enabled = true + } + oauth_scopes = [ + "https://www.googleapis.com/auth/cloud-platform", + ] + + metadata = { + cluster_name = data.google_container_cluster.broker.name + node_pool = "tier3" + disable-legacy-endpoints = "true" + } + + labels = { + cluster_name = data.google_container_cluster.broker.name + node_pool = "tier3" + + # updated by node init daemonset when finished. + "app.broker/initialized" = "false" + + # Used to set pod affinity + "app.broker/tier" = "tier3" + } + + taint = [ + { + # Taint to be removed when node init daemonset completes. + key = "app.broker/node-init" + value = true + effect = "NO_SCHEDULE" + }, + { + # Repel pods without the tier toleration. + key = "app.broker/tier" + value = "tier3" + effect = "NO_SCHEDULE" + }, + ] + } + + management { + auto_repair = true + auto_upgrade = true + } + + autoscaling { + min_node_count = var.tier3_pool_min_node_count + max_node_count = var.tier3_pool_max_node_count + } + + // node labels and taints are modified dynamically by the node init containers + // ignore changes so that Terraform doesn't try to undo their modifications. + lifecycle { + ignore_changes = [ + node_config[0].labels, + node_config[0].taint + ] + } +} diff --git a/setup/infra/image-streaming/variables.tf b/setup/infra/image-streaming/variables.tf new file mode 100644 index 0000000..fcd7cd9 --- /dev/null +++ b/setup/infra/image-streaming/variables.tf @@ -0,0 +1,91 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +variable "gcp_service_list" { + description = "The list of apis necessary for the project" + type = list(string) + default = [ + "containerfilesystem.googleapis.com", + "artifactregistry.googleapis.com" + ] +} + + +variable "project_id" {} +variable "region" {} + +variable "name" { + default = "broker" +} + +# Tier 1 COS node pool parameters +variable "tier3_pool_enabled" { + default = true +} +variable "tier3_pool_machine_type" { + default = "e2-standard-8" +} +variable "tier3_pool_initial_node_count" { + default = 1 +} +variable "tier3_pool_min_node_count" { + default = 0 +} +variable "tier3_pool_max_node_count" { + default = 10 +} +variable "tier3_pool_preemptive_nodes" { + default = false +} +variable "tier3_pool_disk_size_gb" { + default = 100 +} +variable "tier3_pool_disk_type" { + default = "pd-balanced" +} +variable "tier3_pool_ephemeral_storage_ssd_count" { + description = "use local-ssd for ephemeral container storage. NOTE: requires either n1, n2 or n2d instance types." + default = 0 +} + +# Tier 1 Ubuntu node pool parameters +variable "tier3_ubuntu_pool_enabled" { + default = false +} +variable "tier3_ubuntu_pool_machine_type" { + default = "e2-standard-8" +} +variable "tier3_ubuntu_pool_initial_node_count" { + default = 1 +} +variable "tier3_ubuntu_pool_min_node_count" { + default = 0 +} +variable "tier3_ubuntu_pool_max_node_count" { + default = 10 +} +variable "tier3_ubuntu_pool_preemptive_nodes" { + default = false +} +variable "tier3_ubuntu_pool_disk_size_gb" { + default = 100 +} +variable "tier3_ubuntu_pool_disk_type" { + default = "pd-balanced" +} +variable "tier3_ubuntu_pool_ephemeral_storage_ssd_count" { + description = "use local-ssd for ephemeral container storage. NOTE: requires either n1, n2 or n2d instance types." + default = 0 +} + + diff --git a/setup/infra/image-streaming/versions.tf b/setup/infra/image-streaming/versions.tf new file mode 100644 index 0000000..307fbd5 --- /dev/null +++ b/setup/infra/image-streaming/versions.tf @@ -0,0 +1,27 @@ +# Copyright 2022 The Selkies Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +terraform { + backend "gcs" {} + required_version = ">= 1.2.0" + required_providers { + external = "~> 1.2.0" + google = "~> 4.25.0, <4.25.6" + google-beta = "~> 4.25.0" + kubernetes = "~> 2.11.0" + template = "~> 2.1" + null = "~> 2.1" + random = "~> 2.2" + } +} diff --git a/setup/infra/sa.tf b/setup/infra/sa.tf index d22fee1..3c9703b 100644 --- a/setup/infra/sa.tf +++ b/setup/infra/sa.tf @@ -143,3 +143,9 @@ resource "google_project_iam_member" "user_pod_service_account-iap-user" { role = "roles/iap.httpsResourceAccessor" member = "serviceAccount:${google_service_account.user_pod_service_account.email}" } +# Grant user service account access to Artifact Registry. +resource "google_project_iam_member" "cluster_service_account-artifact-registry" { + project = var.project_id + role = "roles/artifactregistry.reader" + member = "serviceAccount:${google_service_account.cluster_service_account.email}" +}