From b5a18a5a68bc3671c58466791b327895b3f06847 Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 27 Oct 2025 18:33:45 +0530 Subject: [PATCH 1/7] initial commit --- .github/workflows/maven.yml | 180 +- helm/atlas-read/Chart.yaml | 28 + helm/atlas-read/README.md | 2 + .../charts/cassandra-online-dc/.helmignore | 17 + .../charts/cassandra-online-dc/Chart.yaml | 19 + .../charts/cassandra-online-dc/README.md | 215 +++ .../sample/create-storage-gce.yaml | 7 + .../cassandra-online-dc/templates/NOTES.txt | 35 + .../templates/_helpers.tpl | 43 + .../templates/backup/cronjob.yaml | 117 ++ .../templates/backup/rbac.yaml | 55 + .../templates/cassandra-online-dc-config.yaml | 1619 +++++++++++++++++ .../templates/configmap.yaml | 16 + .../cassandra-online-dc/templates/pdb.yaml | 24 + .../templates/reaper/cronjob.yaml | 95 + .../templates/service.yaml | 48 + .../templates/servicemonitor.yaml | 31 + .../templates/statefulset.yaml | 442 +++++ .../charts/cassandra-online-dc/values.yaml | 400 ++++ .../elasticsearch-exporter-read/.helmignore | 24 + .../elasticsearch-exporter-read/Chart.yaml | 20 + .../charts/elasticsearch-exporter-read/OWNERS | 8 + .../elasticsearch-exporter-read/README.md | 146 ++ .../ci/default-values.yaml | 1 + .../ci/security-context.yaml | 5 + .../templates/NOTES.txt | 15 + .../templates/_helpers.tpl | 33 + .../templates/cert-secret.yaml | 19 + .../templates/deployment.yaml | 199 ++ .../templates/podsecuritypolicies.yaml | 42 + .../templates/prometheusrule.yaml | 26 + .../templates/role.yaml | 20 + .../templates/rolebinding.yaml | 26 + .../templates/service.yaml | 28 + .../templates/serviceaccount.yaml | 14 + .../templates/servicemonitor.yaml | 42 + .../elasticsearch-exporter-read/values.yaml | 300 +++ .../charts/elasticsearch-read/.helmignore | 2 + .../charts/elasticsearch-read/Chart.yaml | 12 + .../charts/elasticsearch-read/Makefile | 1 + .../charts/elasticsearch-read/README.md | 367 ++++ .../elasticsearch-read/templates/NOTES.txt | 4 + .../elasticsearch-read/templates/_helpers.tpl | 87 + .../templates/backup-cronjob.yaml | 43 + .../templates/configmap.yaml | 19 + .../templates/es-regional-configmap.yaml | 39 + .../elasticsearch-read/templates/ingress.yaml | 57 + .../templates/poddisruptionbudget.yaml | 19 + .../templates/podsecuritypolicy.yaml | 17 + .../elasticsearch-read/templates/role.yaml | 28 + .../templates/rolebinding.yaml | 27 + .../elasticsearch-read/templates/service.yaml | 76 + .../templates/serviceaccount.yaml | 20 + .../templates/statefulset.yaml | 439 +++++ .../templates/synonym-configmap.yaml | 30 + .../charts/elasticsearch-read/values.yaml | 326 ++++ helm/atlas-read/templates/NOTES.txt | 19 + helm/atlas-read/templates/_helpers.tpl | 47 + .../atlas-audit-index-configmap.yaml | 168 ++ .../atlas-logback-config-configmap.yaml | 251 +++ .../configmap-init-container-script.yaml | 122 ++ .../templates/configmap-init-script.yaml | 268 +++ helm/atlas-read/templates/configmap.yaml | 514 ++++++ .../create-atlas-keycloak-config-cm.yaml | 34 + helm/atlas-read/templates/deployment.yaml | 242 +++ .../templates/frontend-service.yaml | 22 + helm/atlas-read/templates/hpa.yaml | 33 + .../templates/keycloak-config-secret.yaml | 18 + .../atlas-read/templates/primary-ingress.yaml | 83 + helm/atlas-read/templates/service.yaml | 41 + helm/atlas-read/templates/statefulset.yaml | 246 +++ helm/atlas-read/templates/statsd-cronjob.yaml | 65 + .../atlas-read/templates/telegraf-config.yaml | 133 ++ helm/atlas-read/values.yaml | 462 +++++ helm/atlas/Chart.yaml | 28 + helm/atlas/README.md | 2 + helm/atlas/charts/cassandra/.helmignore | 17 + helm/atlas/charts/cassandra/Chart.yaml | 19 + helm/atlas/charts/cassandra/README.md | 215 +++ .../cassandra/sample/create-storage-gce.yaml | 7 + .../charts/cassandra/templates/NOTES.txt | 35 + .../charts/cassandra/templates/_helpers.tpl | 43 + .../cassandra/templates/backup/cronjob.yaml | 102 ++ .../cassandra/templates/backup/rbac.yaml | 53 + .../cassandra/templates/cassandra-config.yaml | 1617 ++++++++++++++++ .../charts/cassandra/templates/configmap.yaml | 14 + .../atlas/charts/cassandra/templates/pdb.yaml | 22 + .../cassandra/templates/reaper/cronjob.yaml | 80 + .../charts/cassandra/templates/service.yaml | 46 + .../cassandra/templates/servicemonitor.yaml | 29 + .../cassandra/templates/statefulset.yaml | 434 +++++ helm/atlas/charts/cassandra/values.yaml | 400 ++++ helm/atlas/charts/elasticsearch/.helmignore | 2 + helm/atlas/charts/elasticsearch/Chart.yaml | 12 + helm/atlas/charts/elasticsearch/Makefile | 1 + helm/atlas/charts/elasticsearch/README.md | 367 ++++ .../charts/elasticsearch/templates/NOTES.txt | 4 + .../elasticsearch/templates/_helpers.tpl | 87 + .../templates/backup-cronjob.yaml | 41 + .../elasticsearch/templates/configmap.yaml | 17 + .../templates/es-regional-configmap.yaml | 37 + .../elasticsearch/templates/ingress.yaml | 55 + .../templates/poddisruptionbudget.yaml | 17 + .../templates/podsecuritypolicy.yaml | 15 + .../charts/elasticsearch/templates/role.yaml | 26 + .../elasticsearch/templates/rolebinding.yaml | 25 + .../elasticsearch/templates/service.yaml | 74 + .../templates/serviceaccount.yaml | 18 + .../elasticsearch/templates/statefulset.yaml | 449 +++++ .../templates/synonym-configmap.yaml | 28 + helm/atlas/charts/elasticsearch/values.yaml | 319 ++++ helm/atlas/charts/logstash/.helmignore | 2 + helm/atlas/charts/logstash/Chart.yaml | 12 + helm/atlas/charts/logstash/Makefile | 1 + helm/atlas/charts/logstash/README.md | 244 +++ .../atlas/charts/logstash/templates/NOTES.txt | 2 + .../charts/logstash/templates/_helpers.tpl | 45 + .../logstash/templates/configmap-config.yaml | 20 + .../templates/configmap-metricbeat.yaml | 69 + .../logstash/templates/configmap-pattern.yaml | 20 + .../templates/configmap-pipeline.yaml | 20 + .../charts/logstash/templates/ingress.yaml | 71 + .../templates/poddisruptionbudget.yaml | 23 + .../logstash/templates/podsecuritypolicy.yaml | 16 + .../atlas/charts/logstash/templates/role.yaml | 28 + .../logstash/templates/rolebinding.yaml | 23 + .../charts/logstash/templates/secret.yaml | 30 + .../logstash/templates/service-headless.yaml | 23 + .../charts/logstash/templates/service.yaml | 35 + .../logstash/templates/serviceaccount.yaml | 25 + .../logstash/templates/servicemonitor.yaml | 71 + .../logstash/templates/statefulset.yaml | 240 +++ helm/atlas/charts/logstash/values.yaml | 1015 +++++++++++ helm/atlas/templates/NOTES.txt | 19 + helm/atlas/templates/_helpers.tpl | 47 + .../atlas-audit-index-configmap.yaml | 162 ++ .../atlas-logback-config-configmap.yaml | 249 +++ .../configmap-init-container-script.yaml | 120 ++ .../templates/configmap-init-script.yaml | 259 +++ helm/atlas/templates/configmap.yaml | 503 +++++ .../create-atlas-keycloak-config-cm.yaml | 32 + helm/atlas/templates/deployment.yaml | 270 +++ helm/atlas/templates/frontend-service.yaml | 20 + helm/atlas/templates/healthcheck-ingress.yaml | 77 + helm/atlas/templates/hpa.yaml | 33 + .../templates/keycloak-config-secret.yaml | 16 + helm/atlas/templates/nginx-cm.yaml | 324 ++++ helm/atlas/templates/pdb.yaml | 23 + helm/atlas/templates/podmonitor.yaml | 37 + helm/atlas/templates/primary-ingress.yaml | 81 + helm/atlas/templates/ratelimited-service.yaml | 32 + helm/atlas/templates/secondary-ingress.yaml | 172 ++ helm/atlas/templates/service.yaml | 47 + helm/atlas/templates/statefulset.yaml | 253 +++ helm/atlas/templates/statsd-cronjob.yaml | 63 + helm/atlas/templates/telegraf-config.yaml | 150 ++ helm/atlas/templates/tls_secrets.yaml | 10 + helm/atlas/values.yaml | 502 +++++ 158 files changed, 19033 insertions(+), 2 deletions(-) create mode 100644 helm/atlas-read/Chart.yaml create mode 100644 helm/atlas-read/README.md create mode 100755 helm/atlas-read/charts/cassandra-online-dc/.helmignore create mode 100755 helm/atlas-read/charts/cassandra-online-dc/Chart.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/README.md create mode 100755 helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/NOTES.txt create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/_helpers.tpl create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/backup/cronjob.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/backup/rbac.yaml create mode 100644 helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/configmap.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/pdb.yaml create mode 100644 helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/service.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/servicemonitor.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml create mode 100755 helm/atlas-read/charts/cassandra-online-dc/values.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/.helmignore create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/Chart.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/OWNERS create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/README.md create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/ci/default-values.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/ci/security-context.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/NOTES.txt create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/_helpers.tpl create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/cert-secret.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/deployment.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/podsecuritypolicies.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/prometheusrule.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/role.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/rolebinding.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/service.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/serviceaccount.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/templates/servicemonitor.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/.helmignore create mode 100755 helm/atlas-read/charts/elasticsearch-read/Chart.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/Makefile create mode 100755 helm/atlas-read/charts/elasticsearch-read/README.md create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/NOTES.txt create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/_helpers.tpl create mode 100644 helm/atlas-read/charts/elasticsearch-read/templates/backup-cronjob.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/configmap.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-read/templates/es-regional-configmap.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/ingress.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/poddisruptionbudget.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/podsecuritypolicy.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/role.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-read/templates/service.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/serviceaccount.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-read/templates/synonym-configmap.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/values.yaml create mode 100644 helm/atlas-read/templates/NOTES.txt create mode 100644 helm/atlas-read/templates/_helpers.tpl create mode 100644 helm/atlas-read/templates/atlas-audit-index-configmap.yaml create mode 100644 helm/atlas-read/templates/atlas-logback-config-configmap.yaml create mode 100644 helm/atlas-read/templates/configmap-init-container-script.yaml create mode 100644 helm/atlas-read/templates/configmap-init-script.yaml create mode 100644 helm/atlas-read/templates/configmap.yaml create mode 100644 helm/atlas-read/templates/create-atlas-keycloak-config-cm.yaml create mode 100644 helm/atlas-read/templates/deployment.yaml create mode 100644 helm/atlas-read/templates/frontend-service.yaml create mode 100644 helm/atlas-read/templates/hpa.yaml create mode 100644 helm/atlas-read/templates/keycloak-config-secret.yaml create mode 100644 helm/atlas-read/templates/primary-ingress.yaml create mode 100644 helm/atlas-read/templates/service.yaml create mode 100644 helm/atlas-read/templates/statefulset.yaml create mode 100644 helm/atlas-read/templates/statsd-cronjob.yaml create mode 100644 helm/atlas-read/templates/telegraf-config.yaml create mode 100644 helm/atlas-read/values.yaml create mode 100644 helm/atlas/Chart.yaml create mode 100644 helm/atlas/README.md create mode 100755 helm/atlas/charts/cassandra/.helmignore create mode 100755 helm/atlas/charts/cassandra/Chart.yaml create mode 100755 helm/atlas/charts/cassandra/README.md create mode 100755 helm/atlas/charts/cassandra/sample/create-storage-gce.yaml create mode 100755 helm/atlas/charts/cassandra/templates/NOTES.txt create mode 100755 helm/atlas/charts/cassandra/templates/_helpers.tpl create mode 100755 helm/atlas/charts/cassandra/templates/backup/cronjob.yaml create mode 100755 helm/atlas/charts/cassandra/templates/backup/rbac.yaml create mode 100644 helm/atlas/charts/cassandra/templates/cassandra-config.yaml create mode 100755 helm/atlas/charts/cassandra/templates/configmap.yaml create mode 100755 helm/atlas/charts/cassandra/templates/pdb.yaml create mode 100644 helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml create mode 100755 helm/atlas/charts/cassandra/templates/service.yaml create mode 100755 helm/atlas/charts/cassandra/templates/servicemonitor.yaml create mode 100755 helm/atlas/charts/cassandra/templates/statefulset.yaml create mode 100755 helm/atlas/charts/cassandra/values.yaml create mode 100755 helm/atlas/charts/elasticsearch/.helmignore create mode 100755 helm/atlas/charts/elasticsearch/Chart.yaml create mode 100755 helm/atlas/charts/elasticsearch/Makefile create mode 100755 helm/atlas/charts/elasticsearch/README.md create mode 100755 helm/atlas/charts/elasticsearch/templates/NOTES.txt create mode 100755 helm/atlas/charts/elasticsearch/templates/_helpers.tpl create mode 100644 helm/atlas/charts/elasticsearch/templates/backup-cronjob.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/configmap.yaml create mode 100644 helm/atlas/charts/elasticsearch/templates/es-regional-configmap.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/ingress.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/podsecuritypolicy.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/role.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/rolebinding.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/service.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/serviceaccount.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/statefulset.yaml create mode 100644 helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml create mode 100755 helm/atlas/charts/elasticsearch/values.yaml create mode 100644 helm/atlas/charts/logstash/.helmignore create mode 100644 helm/atlas/charts/logstash/Chart.yaml create mode 100644 helm/atlas/charts/logstash/Makefile create mode 100644 helm/atlas/charts/logstash/README.md create mode 100755 helm/atlas/charts/logstash/templates/NOTES.txt create mode 100755 helm/atlas/charts/logstash/templates/_helpers.tpl create mode 100644 helm/atlas/charts/logstash/templates/configmap-config.yaml create mode 100644 helm/atlas/charts/logstash/templates/configmap-metricbeat.yaml create mode 100644 helm/atlas/charts/logstash/templates/configmap-pattern.yaml create mode 100644 helm/atlas/charts/logstash/templates/configmap-pipeline.yaml create mode 100644 helm/atlas/charts/logstash/templates/ingress.yaml create mode 100644 helm/atlas/charts/logstash/templates/poddisruptionbudget.yaml create mode 100644 helm/atlas/charts/logstash/templates/podsecuritypolicy.yaml create mode 100644 helm/atlas/charts/logstash/templates/role.yaml create mode 100644 helm/atlas/charts/logstash/templates/rolebinding.yaml create mode 100644 helm/atlas/charts/logstash/templates/secret.yaml create mode 100644 helm/atlas/charts/logstash/templates/service-headless.yaml create mode 100644 helm/atlas/charts/logstash/templates/service.yaml create mode 100644 helm/atlas/charts/logstash/templates/serviceaccount.yaml create mode 100644 helm/atlas/charts/logstash/templates/servicemonitor.yaml create mode 100644 helm/atlas/charts/logstash/templates/statefulset.yaml create mode 100644 helm/atlas/charts/logstash/values.yaml create mode 100644 helm/atlas/templates/NOTES.txt create mode 100644 helm/atlas/templates/_helpers.tpl create mode 100644 helm/atlas/templates/atlas-audit-index-configmap.yaml create mode 100644 helm/atlas/templates/atlas-logback-config-configmap.yaml create mode 100644 helm/atlas/templates/configmap-init-container-script.yaml create mode 100644 helm/atlas/templates/configmap-init-script.yaml create mode 100644 helm/atlas/templates/configmap.yaml create mode 100644 helm/atlas/templates/create-atlas-keycloak-config-cm.yaml create mode 100644 helm/atlas/templates/deployment.yaml create mode 100644 helm/atlas/templates/frontend-service.yaml create mode 100644 helm/atlas/templates/healthcheck-ingress.yaml create mode 100644 helm/atlas/templates/hpa.yaml create mode 100644 helm/atlas/templates/keycloak-config-secret.yaml create mode 100644 helm/atlas/templates/nginx-cm.yaml create mode 100644 helm/atlas/templates/pdb.yaml create mode 100644 helm/atlas/templates/podmonitor.yaml create mode 100644 helm/atlas/templates/primary-ingress.yaml create mode 100644 helm/atlas/templates/ratelimited-service.yaml create mode 100644 helm/atlas/templates/secondary-ingress.yaml create mode 100644 helm/atlas/templates/service.yaml create mode 100644 helm/atlas/templates/statefulset.yaml create mode 100644 helm/atlas/templates/statsd-cronjob.yaml create mode 100644 helm/atlas/templates/telegraf-config.yaml create mode 100644 helm/atlas/templates/tls_secrets.yaml create mode 100644 helm/atlas/values.yaml diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index c576ca87839..9fddbd99731 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,8 +34,49 @@ on: - mlh-1240-improve-cm-refresh-master jobs: - build: + helm-lint: + runs-on: ubuntu-latest + strategy: + matrix: + chart: ['atlas', 'atlas-read'] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Install Helm + uses: azure/setup-helm@v3 + with: + version: '3.12.0' + + - name: Update helm dependencies + run: | + cd helm/${{ matrix.chart }} + helm dependency update + + echo "Chart dependencies:" + ls -la charts/ + + - name: Lint helm chart + run: | + helm lint helm/${{ matrix.chart }}/ + echo "✅ ${{ matrix.chart }} chart lint passed!" + + - name: Validate Chart.yaml + run: | + # Check for required fields + if ! grep -q "^version:" helm/${{ matrix.chart }}/Chart.yaml; then + echo "❌ Error: version field missing in Chart.yaml" + exit 1 + fi + if ! grep -q "^appVersion:" helm/${{ matrix.chart }}/Chart.yaml; then + echo "❌ Error: appVersion field missing in Chart.yaml" + exit 1 + fi + echo "✅ Chart.yaml validation passed!" + build: + needs: helm-lint runs-on: ubuntu-latest steps: @@ -430,4 +471,139 @@ jobs: uses: actions/upload-artifact@v4 with: name: smoke-test-logs-${{ github.run_id }} - path: smoke-test-logs/ \ No newline at end of file + path: smoke-test-logs/ + + helm-publish: + needs: smoke-test # Only publish if smoke tests pass in all clouds + runs-on: ubuntu-latest + strategy: + matrix: + chart: ['atlas', 'atlas-read'] + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Get branch name + id: branch + run: | + echo "name=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT + + - name: Get commit ID + id: commit + run: | + echo "id=$(echo ${GITHUB_SHA} | cut -c1-7)abcd" >> $GITHUB_OUTPUT + + - name: Generate chart version + id: version + run: | + # Semantic version: 1.0.0-branch.commitid + # Replace underscores with hyphens for semver compliance + BRANCH_NAME_NORMALIZED=$(echo "${{ steps.branch.outputs.name }}" | tr '_' '-') + CHART_VERSION="1.0.0-${BRANCH_NAME_NORMALIZED}.${{ steps.commit.outputs.id }}" + echo "chart=${CHART_VERSION}" >> $GITHUB_OUTPUT + echo "Generated chart version: ${CHART_VERSION}" + + - name: Install Helm + uses: azure/setup-helm@v3 + with: + version: '3.12.0' + + - name: Update Chart.yaml with version + run: | + sed -i "s/^version: .*/version: ${{ steps.version.outputs.chart }}/" helm/${{ matrix.chart }}/Chart.yaml + sed -i "s/^appVersion: .*/appVersion: \"${{ steps.commit.outputs.id }}\"/" helm/${{ matrix.chart }}/Chart.yaml + + echo "Updated ${{ matrix.chart }}/Chart.yaml:" + cat helm/${{ matrix.chart }}/Chart.yaml | grep -E "^(version|appVersion):" + + - name: Update values.yaml with image tags + run: | + # Replace placeholders with actual values + sed -i "s/ATLAS_LATEST_IMAGE_TAG/${{ steps.commit.outputs.id }}/g" helm/${{ matrix.chart }}/values.yaml + sed -i "s/ATLAS_BRANCH_NAME/${{ steps.branch.outputs.name }}/g" helm/${{ matrix.chart }}/values.yaml + + echo "Image configuration in ${{ matrix.chart }}/values.yaml:" + grep -A 3 "image:" helm/${{ matrix.chart }}/values.yaml | head -10 + + - name: Update helm dependencies + run: | + cd helm/${{ matrix.chart }} + helm dependency update + + echo "Chart dependencies:" + ls -la charts/ + + - name: Package helm chart + run: | + mkdir -p helm-packages + helm package helm/${{ matrix.chart }}/ --destination ./helm-packages/ + + echo "Packaged charts:" + ls -lh helm-packages/ + + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: $GITHUB_ACTOR + password: ${{ secrets.ORG_PAT_GITHUB }} + + - name: Push chart to GHCR (OCI Registry) + run: | + CHART_FILE=$(ls helm-packages/${{ matrix.chart }}-*.tgz) + echo "Pushing chart: ${CHART_FILE}" + + helm push ${CHART_FILE} oci://ghcr.io/atlanhq/helm-charts + + echo "✅ Chart published successfully!" + echo "📦 Chart: ${{ matrix.chart }}" + echo "📌 Version: ${{ steps.version.outputs.chart }}" + echo "🏷️ Registry: oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }}" + + - name: Create GitHub Release + uses: ncipollo/release-action@v1 + with: + tag: helm-${{ matrix.chart }}-v${{ steps.version.outputs.chart }} + name: "${{ matrix.chart }} Helm Chart v${{ steps.version.outputs.chart }}" + body: | + ## 📦 ${{ matrix.chart }} Helm Chart Release + + **Chart**: `${{ matrix.chart }}` + **Chart Version**: `${{ steps.version.outputs.chart }}` + **App Version**: `${{ steps.commit.outputs.id }}` + **Branch**: `${{ steps.branch.outputs.name }}` + + ### 🐳 Docker Image + ``` + ghcr.io/atlanhq/atlas-metastore-${{ steps.branch.outputs.name }}:${{ steps.commit.outputs.id }} + ``` + + ### 📥 Installation + + **Via OCI Registry (Recommended):** + ```bash + helm install ${{ matrix.chart }} oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }} \ + --version ${{ steps.version.outputs.chart }} + ``` + + **Via Downloaded Chart:** + ```bash + helm install ${{ matrix.chart }} ./${{ matrix.chart }}-${{ steps.version.outputs.chart }}.tgz + ``` + artifacts: "./helm-packages/${{ matrix.chart }}-*.tgz" + token: ${{ secrets.GITHUB_TOKEN }} + makeLatest: false + + - name: Chart publish summary + run: | + echo "## 🎉 Helm Chart Published Successfully!" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Chart**: ${{ matrix.chart }}" >> $GITHUB_STEP_SUMMARY + echo "**Version**: ${{ steps.version.outputs.chart }}" >> $GITHUB_STEP_SUMMARY + echo "**Registry**: oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### Installation Command" >> $GITHUB_STEP_SUMMARY + echo '```bash' >> $GITHUB_STEP_SUMMARY + echo "helm install ${{ matrix.chart }} oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }} --version ${{ steps.version.outputs.chart }}" >> $GITHUB_STEP_SUMMARY + echo '```' >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/helm/atlas-read/Chart.yaml b/helm/atlas-read/Chart.yaml new file mode 100644 index 00000000000..82124dece3e --- /dev/null +++ b/helm/atlas-read/Chart.yaml @@ -0,0 +1,28 @@ +apiVersion: v2 +name: atlas-read +description: Apache Atlas Read Replica for Metadata Management +type: application +version: 1.0.0 +appVersion: "3.0.0" # Will be updated by CI with commit ID +maintainers: + - name: Atlan Engineering + email: engineering@atlan.com +keywords: + - atlas + - atlas-read + - metadata + - read-replica + - apache-atlas +sources: + - https://github.com/atlanhq/atlas-metastore +home: https://github.com/atlanhq/atlas-metastore +dependencies: + - name: cassandra-online-dc + repository: file://./charts/cassandra-online-dc + version: 0.x.x + - name: elasticsearch-read + repository: file://./charts/elasticsearch-read + version: 7.x.x + - name: elasticsearch-exporter-read + repository: file://./charts/elasticsearch-exporter-read + version: 3.3.0 diff --git a/helm/atlas-read/README.md b/helm/atlas-read/README.md new file mode 100644 index 00000000000..d8be6656da1 --- /dev/null +++ b/helm/atlas-read/README.md @@ -0,0 +1,2 @@ +# atlas +This chart will install the apache atlas which use elasticsearch and cassandra. diff --git a/helm/atlas-read/charts/cassandra-online-dc/.helmignore b/helm/atlas-read/charts/cassandra-online-dc/.helmignore new file mode 100755 index 00000000000..5e03def0cfb --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/.helmignore @@ -0,0 +1,17 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +OWNERS diff --git a/helm/atlas-read/charts/cassandra-online-dc/Chart.yaml b/helm/atlas-read/charts/cassandra-online-dc/Chart.yaml new file mode 100755 index 00000000000..8f5ed5235d7 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/Chart.yaml @@ -0,0 +1,19 @@ +apiVersion: v2 +appVersion: 3.11.5 +description: Apache Cassandra is a free and open-source distributed database management + system designed to handle large amounts of data across many commodity servers, providing + high availability with no single point of failure. +engine: gotpl +home: http://cassandra.apache.org +icon: https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Cassandra_logo.svg/330px-Cassandra_logo.svg.png +keywords: +- cassandra +- database +- nosql +maintainers: +- email: goonohc@gmail.com + name: KongZ +- email: maor.friedman@redhat.com + name: maorfr +name: cassandra-online-dc +version: 0.14.4 diff --git a/helm/atlas-read/charts/cassandra-online-dc/README.md b/helm/atlas-read/charts/cassandra-online-dc/README.md new file mode 100755 index 00000000000..796fe331d2d --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/README.md @@ -0,0 +1,215 @@ +# Cassandra +A Cassandra Chart for Kubernetes + +## Install Chart +To install the Cassandra Chart into your Kubernetes cluster (This Chart requires persistent volume by default, you may need to create a storage class before install chart. To create storage class, see [Persist data](#persist_data) section) + +```bash +helm install --namespace "cassandra" -n "cassandra" incubator/cassandra +``` + +After installation succeeds, you can get a status of Chart + +```bash +helm status "cassandra" +``` + +If you want to delete your Chart, use this command +```bash +helm delete --purge "cassandra" +``` + +## Upgrading + +To upgrade your Cassandra release, simply run + +```bash +helm upgrade "cassandra" incubator/cassandra +``` + +### 0.12.0 + +This version fixes https://github.com/helm/charts/issues/7803 by removing mutable labels in `spec.VolumeClaimTemplate.metadata.labels` so that it is upgradable. + +Until this version, in order to upgrade, you have to delete the Cassandra StatefulSet before upgrading: +```bash +$ kubectl delete statefulset --cascade=false my-cassandra-release +``` + + +## Persist data +You need to create `StorageClass` before able to persist data in persistent volume. +To create a `StorageClass` on Google Cloud, run the following + +```bash +kubectl create -f sample/create-storage-gce.yaml +``` + +And set the following values in `values.yaml` + +```yaml +persistence: + enabled: true +``` + +If you want to create a `StorageClass` on other platform, please see documentation here [https://kubernetes.io/docs/user-guide/persistent-volumes/](https://kubernetes.io/docs/user-guide/persistent-volumes/) + +When running a cluster without persistence, the termination of a pod will first initiate a decommissioning of that pod. +Depending on the amount of data stored inside the cluster this may take a while. In order to complete a graceful +termination, pods need to get more time for it. Set the following values in `values.yaml`: + +```yaml +podSettings: + terminationGracePeriodSeconds: 1800 +``` + +## Install Chart with specific cluster size +By default, this Chart will create a cassandra with 3 nodes. If you want to change the cluster size during installation, you can use `--set config.cluster_size={value}` argument. Or edit `values.yaml` + +For example: +Set cluster size to 5 + +```bash +helm install --namespace "cassandra" -n "cassandra" --set config.cluster_size=5 incubator/cassandra/ +``` + +## Install Chart with specific resource size +By default, this Chart will create a cassandra with CPU 2 vCPU and 4Gi of memory which is suitable for development environment. +If you want to use this Chart for production, I would recommend to update the CPU to 4 vCPU and 16Gi. Also increase size of `max_heap_size` and `heap_new_size`. +To update the settings, edit `values.yaml` + +## Install Chart with specific node +Sometime you may need to deploy your cassandra to specific nodes to allocate resources. You can use node selector by edit `nodes.enabled=true` in `values.yaml` +For example, you have 6 vms in node pools and you want to deploy cassandra to node which labeled as `cloud.google.com/gke-nodepool: pool-db` + +Set the following values in `values.yaml` + +```yaml +nodes: + enabled: true + selector: + nodeSelector: + cloud.google.com/gke-nodepool: pool-db +``` + +## Configuration + +The following table lists the configurable parameters of the Cassandra chart and their default values. + +| Parameter | Description | Default | +| ----------------------- | --------------------------------------------- | ---------------------------------------------------------- | +| `image.repo` | `cassandra` image repository | `cassandra` | +| `image.tag` | `cassandra` image tag | `3.11.5` | +| `image.pullPolicy` | Image pull policy | `Always` if `imageTag` is `latest`, else `IfNotPresent` | +| `image.pullSecrets` | Image pull secrets | `nil` | +| `config.cluster_domain` | The name of the cluster domain. | `cluster.local` | +| `config.cluster_name` | The name of the cluster. | `cassandra` | +| `config.cluster_size` | The number of nodes in the cluster. | `3` | +| `config.seed_size` | The number of seed nodes used to bootstrap new clients joining the cluster. | `2` | +| `config.seeds` | The comma-separated list of seed nodes. | Automatically generated according to `.Release.Name` and `config.seed_size` | +| `config.num_tokens` | Initdb Arguments | `256` | +| `config.dc_name` | Initdb Arguments | `DC1` | +| `config.rack_name` | Initdb Arguments | `RAC1` | +| `config.endpoint_snitch` | Initdb Arguments | `SimpleSnitch` | +| `config.max_heap_size` | Initdb Arguments | `2048M` | +| `config.heap_new_size` | Initdb Arguments | `512M` | +| `config.ports.cql` | Initdb Arguments | `9042` | +| `config.ports.thrift` | Initdb Arguments | `9160` | +| `config.ports.agent` | The port of the JVM Agent (if any) | `nil` | +| `config.start_rpc` | Initdb Arguments | `false` | +| `configOverrides` | Overrides config files in /etc/cassandra dir | `{}` | +| `commandOverrides` | Overrides default docker command | `[]` | +| `argsOverrides` | Overrides default docker args | `[]` | +| `env` | Custom env variables | `{}` | +| `schedulerName` | Name of k8s scheduler (other than the default) | `nil` | +| `persistence.enabled` | Use a PVC to persist data | `true` | +| `persistence.storageClass` | Storage class of backing PVC | `nil` (uses alpha storage class annotation) | +| `persistence.accessMode` | Use volume as ReadOnly or ReadWrite | `ReadWriteOnce` | +| `persistence.size` | Size of data volume | `10Gi` | +| `resources` | CPU/Memory resource requests/limits | Memory: `4Gi`, CPU: `2` | +| `service.type` | k8s service type exposing ports, e.g. `NodePort`| `ClusterIP` | +| `service.annotations` | Annotations to apply to cassandra service | `""` | +| `podManagementPolicy` | podManagementPolicy of the StatefulSet | `OrderedReady` | +| `podDisruptionBudget` | Pod distruption budget | `{}` | +| `podAnnotations` | pod annotations for the StatefulSet | `{}` | +| `updateStrategy.type` | UpdateStrategy of the StatefulSet | `OnDelete` | +| `livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `90` | +| `livenessProbe.periodSeconds` | How often to perform the probe | `30` | +| `livenessProbe.timeoutSeconds` | When the probe times out | `5` | +| `livenessProbe.successThreshold` | Minimum consecutive successes for the probe to be considered successful after having failed. | `1` | +| `livenessProbe.failureThreshold` | Minimum consecutive failures for the probe to be considered failed after having succeeded. | `3` | +| `readinessProbe.initialDelaySeconds` | Delay before readiness probe is initiated | `90` | +| `readinessProbe.periodSeconds` | How often to perform the probe | `30` | +| `readinessProbe.timeoutSeconds` | When the probe times out | `5` | +| `readinessProbe.successThreshold` | Minimum consecutive successes for the probe to be considered successful after having failed. | `1` | +| `readinessProbe.failureThreshold` | Minimum consecutive failures for the probe to be considered failed after having succeeded. | `3` | +| `readinessProbe.address` | Address to use for checking node has joined the cluster and is ready. | `${POD_IP}` | +| `rbac.create` | Specifies whether RBAC resources should be created | `true` | +| `serviceAccount.create` | Specifies whether a ServiceAccount should be created | `true` | +| `serviceAccount.name` | The name of the ServiceAccount to use | | +| `backup.enabled` | Enable backup on chart installation | `false` | +| `backup.schedule` | Keyspaces to backup, each with cron time | | +| `backup.annotations` | Backup pod annotations | iam.amazonaws.com/role: `cain` | +| `backup.image.repository` | Backup image repository | `maorfr/cain` | +| `backup.image.tag` | Backup image tag | `0.6.0` | +| `backup.extraArgs` | Additional arguments for cain | `[]` | +| `backup.env` | Backup environment variables | AWS_REGION: `us-east-1` | +| `backup.resources` | Backup CPU/Memory resource requests/limits | Memory: `1Gi`, CPU: `1` | +| `backup.destination` | Destination to store backup artifacts | `s3://bucket/cassandra` | +| `backup.google.serviceAccountSecret` | Secret containing credentials if GCS is used as destination | | +| `exporter.enabled` | Enable Cassandra exporter | `false` | +| `exporter.servicemonitor.enabled` | Enable ServiceMonitor for exporter | `true` | +| `exporter.servicemonitor.additionalLabels`| Additional labels for Service Monitor | `{}` | +| `exporter.image.repo` | Exporter image repository | `criteord/cassandra_exporter` | +| `exporter.image.tag` | Exporter image tag | `2.0.2` | +| `exporter.port` | Exporter port | `5556` | +| `exporter.jvmOpts` | Exporter additional JVM options | | +| `exporter.resources` | Exporter CPU/Memory resource requests/limits | `{}` | +| `affinity` | Kubernetes node affinity | `{}` | +| `tolerations` | Kubernetes node tolerations | `[]` | + + +## Scale cassandra +When you want to change the cluster size of your cassandra, you can use the helm upgrade command. + +```bash +helm upgrade --set config.cluster_size=5 cassandra incubator/cassandra +``` + +## Get cassandra status +You can get your cassandra cluster status by running the command + +```bash +kubectl exec -it --namespace cassandra $(kubectl get pods --namespace cassandra -l app=cassandra-cassandra -o jsonpath='{.items[0].metadata.name}') nodetool status +``` + +Output +```bash +Datacenter: asia-east1 +====================== +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +UN 10.8.1.11 108.45 KiB 256 66.1% 410cc9da-8993-4dc2-9026-1dd381874c54 a +UN 10.8.4.12 84.08 KiB 256 68.7% 96e159e1-ef94-406e-a0be-e58fbd32a830 c +UN 10.8.3.6 103.07 KiB 256 65.2% 1a42b953-8728-4139-b070-b855b8fff326 b +``` + +## Benchmark +You can use [cassandra-stress](https://docs.datastax.com/en/cassandra/3.0/cassandra/tools/toolsCStress.html) tool to run the benchmark on the cluster by the following command + +```bash +kubectl exec -it --namespace cassandra $(kubectl get pods --namespace cassandra -l app=cassandra-cassandra -o jsonpath='{.items[0].metadata.name}') cassandra-stress +``` + +Example of `cassandra-stress` argument + - Run both read and write with ration 9:1 + - Operator total 1 million keys with uniform distribution + - Use QUORUM for read/write + - Generate 50 threads + - Generate result in graph + - Use NetworkTopologyStrategy with replica factor 2 + +```bash +cassandra-stress mixed ratio\(write=1,read=9\) n=1000000 cl=QUORUM -pop dist=UNIFORM\(1..1000000\) -mode native cql3 -rate threads=50 -log file=~/mixed_autorate_r9w1_1M.log -graph file=test2.html title=test revision=test2 -schema "replication(strategy=NetworkTopologyStrategy, factor=2)" +``` diff --git a/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml b/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml new file mode 100755 index 00000000000..2467b95227e --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml @@ -0,0 +1,7 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: generic +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/NOTES.txt b/helm/atlas-read/charts/cassandra-online-dc/templates/NOTES.txt new file mode 100755 index 00000000000..9ecb0045ddd --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/NOTES.txt @@ -0,0 +1,35 @@ +Cassandra CQL can be accessed via port {{ .Values.config.ports.cql }} on the following DNS name from within your cluster: +Cassandra Thrift can be accessed via port {{ .Values.config.ports.thrift }} on the following DNS name from within your cluster: + +If you want to connect to the remote instance with your local Cassandra CQL cli. To forward the API port to localhost:9042 run the following: +- kubectl port-forward --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{ .items[0].metadata.name }') 9042:{{ .Values.config.ports.cql }} + +If you want to connect to the Cassandra CQL run the following: +{{- if contains "NodePort" .Values.service.type }} +- export CQL_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "cassandra.fullname" . }}) +- export CQL_HOST=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") +- cqlsh $CQL_HOST $CQL_PORT + +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + Watch the status with: 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "cassandra.fullname" . }}' +- export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "cassandra.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +- echo cqlsh $SERVICE_IP +{{- else if contains "ClusterIP" .Values.service.type }} +- kubectl port-forward --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "cassandra.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 9042:{{ .Values.config.ports.cql }} + echo cqlsh 127.0.0.1 9042 +{{- end }} + +You can also see the cluster status by run the following: +- kubectl exec -it --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{.items[0].metadata.name}') nodetool status + +To tail the logs for the Cassandra pod run the following: +- kubectl logs -f --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{ .items[0].metadata.name }') + +{{- if not .Values.persistence.enabled }} + +Note that the cluster is running with node-local storage instead of PersistentVolumes. In order to prevent data loss, +pods will be decommissioned upon termination. Decommissioning may take some time, so you might also want to adjust the +pod termination gace period, which is currently set to {{ .Values.podSettings.terminationGracePeriodSeconds }} seconds. + +{{- end}} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/_helpers.tpl b/helm/atlas-read/charts/cassandra-online-dc/templates/_helpers.tpl new file mode 100755 index 00000000000..b8704209bf4 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/_helpers.tpl @@ -0,0 +1,43 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cassandra.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cassandra.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cassandra.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "cassandra.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "cassandra.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/backup/cronjob.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/backup/cronjob.yaml new file mode 100755 index 00000000000..87246e83690 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/backup/cronjob.yaml @@ -0,0 +1,117 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.backup.enabled }} +{{- $release := .Release }} +{{- $values := .Values }} +{{- $backup := $values.backup }} +{{- range $index, $schedule := $backup.schedule }} +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: {{ template "cassandra.fullname" $ }}-backup + namespace: {{ $.Values.Namespace }} + labels: + app: {{ template "cassandra.name" $ }}-cain + chart: {{ template "cassandra.chart" $ }} + release: "{{ $release.Name }}" + heritage: "{{ $release.Service }}" +spec: + schedule: {{ $schedule.cron | quote }} + concurrencyPolicy: Forbid + startingDeadlineSeconds: 120 + jobTemplate: + spec: + template: + metadata: + annotations: + {{ toYaml $backup.annotations }} + spec: + restartPolicy: OnFailure + serviceAccountName: {{ template "cassandra.serviceAccountName" $ }} + {{- with $.Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- $multiarchEnabled := and $.Values.multiarch (eq $.Values.multiarch.enabled true) }} + {{- if or $.Values.tolerations $multiarchEnabled }} + tolerations: + {{- if $.Values.tolerations }} + {{ toYaml $.Values.tolerations | nindent 12 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} + containers: + - name: cassandra-backup + {{- if and $.Values.multiarch.enabled $.Values.multiarch.image.cain }} + image: {{ $.Values.multiarch.image.cain }} + {{- else }} + image: "{{ $backup.image.repository }}:{{ $backup.image.tag }}" + {{- end }} + command: ["cain"] + args: + - backup + - --namespace + - {{ $release.Namespace }} + - --selector + - release={{ $release.Name }},app={{ template "cassandra.name" $ }} + - --keyspace + - {{ $schedule.keyspace }} + - --dst + - {{ $backup.destination }} + {{- with $backup.extraArgs }} +{{ toYaml . | indent 12 }} + {{- end }} + env: +{{- if $backup.google.serviceAccountSecret }} + - name: GOOGLE_APPLICATION_CREDENTIALS + value: "/etc/secrets/google/credentials.json" +{{- end }} + {{- with $backup.env }} +{{ toYaml . | indent 12 }} + {{- end }} +{{- $tierType := $.Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- with $backup.resources }} + resources: +{{ toYaml . | indent 14 }} + {{- end }} +{{- end }} +{{- if $backup.google.serviceAccountSecret }} + volumeMounts: + - name: google-service-account + mountPath: /etc/secrets/google/ +{{- end }} +{{- if $backup.google.serviceAccountSecret }} + volumes: + - name: google-service-account + secret: + secretName: {{ $backup.google.serviceAccountSecret | quote }} +{{- end }} + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ template "cassandra.fullname" $ }} + - key: release + operator: In + values: + - {{ $release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- with $values.tolerations }} + tolerations: +{{ toYaml . | indent 12 }} + {{- end }} +{{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/backup/rbac.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/backup/rbac.yaml new file mode 100755 index 00000000000..73742f49dbb --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/backup/rbac.yaml @@ -0,0 +1,55 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.backup.enabled }} +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "cassandra.serviceAccountName" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +--- +{{- end }} +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "cassandra.fullname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +rules: +- apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list"] +- apiGroups: [""] + resources: ["pods/exec"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "cassandra.fullname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "cassandra.fullname" . }}-backup +subjects: +- kind: ServiceAccount + name: {{ template "cassandra.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml new file mode 100644 index 00000000000..4e7ed59b9e1 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml @@ -0,0 +1,1619 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: cassandra-online-dc-config + namespace: atlas +data: + cassandra.yaml: |- + # Cassandra storage config YAML + + # NOTE: + # See http://wiki.apache.org/cassandra/StorageConfiguration for + # full explanations of configuration directives + # /NOTE + + # The name of the cluster. This is mainly used to prevent machines in + # one logical cluster from joining another. + cluster_name: cassandra + + # This defines the number of tokens randomly assigned to this node on the ring + # The more tokens, relative to other nodes, the larger the proportion of data + # that this node will store. You probably want all nodes to have the same number + # of tokens assuming they have equal hardware capability. + # + # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, + # and will use the initial_token as described below. + # + # Specifying initial_token will override this setting on the node's initial start, + # on subsequent starts, this setting will apply even if initial token is set. + # + # If you already have a cluster with 1 token per node, and wish to migrate to + # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations + num_tokens: 256 + + # Triggers automatic allocation of num_tokens tokens for this node. The allocation + # algorithm attempts to choose tokens in a way that optimizes replicated load over + # the nodes in the datacenter for the replication strategy used by the specified + # keyspace. + # + # The load assigned to each node will be close to proportional to its number of + # vnodes. + # + # Only supported with the Murmur3Partitioner. + # allocate_tokens_for_keyspace: KEYSPACE + + # initial_token allows you to specify tokens manually. While you can use it with + # vnodes (num_tokens > 1, above) -- in which case you should provide a + # comma-separated list -- it's primarily used when adding nodes to legacy clusters + # that do not have vnodes enabled. + # initial_token: + + # See http://wiki.apache.org/cassandra/HintedHandoff + # May either be "true" or "false" to enable globally + hinted_handoff_enabled: true + + # When hinted_handoff_enabled is true, a black list of data centers that will not + # perform hinted handoff + # hinted_handoff_disabled_datacenters: + # - DC1 + # - DC2 + + # this defines the maximum amount of time a dead host will have hints + # generated. After it has been dead this long, new hints for it will not be + # created until it has been seen alive and gone down again. + max_hint_window_in_ms: 10800000 # 3 hours + + # Maximum throttle in KBs per second, per delivery thread. This will be + # reduced proportionally to the number of nodes in the cluster. (If there + # are two nodes in the cluster, each delivery thread will use the maximum + # rate; if there are three, each will throttle to half of the maximum, + # since we expect two nodes to be delivering hints simultaneously.) + hinted_handoff_throttle_in_kb: 1024 + + # Number of threads with which to deliver hints; + # Consider increasing this number when you have multi-dc deployments, since + # cross-dc handoff tends to be slower + max_hints_delivery_threads: 2 + + # Directory where Cassandra should store hints. + # If not set, the default directory is $CASSANDRA_HOME/data/hints. + # hints_directory: /var/lib/cassandra/hints + + # How often hints should be flushed from the internal buffers to disk. + # Will *not* trigger fsync. + hints_flush_period_in_ms: 10000 + + # Maximum size for a single hints file, in megabytes. + max_hints_file_size_in_mb: 128 + + # Compression to apply to the hint files. If omitted, hints files + # will be written uncompressed. LZ4, Snappy, and Deflate compressors + # are supported. + #hints_compression: + # - class_name: LZ4Compressor + # parameters: + # - + + # Maximum throttle in KBs per second, total. This will be + # reduced proportionally to the number of nodes in the cluster. + batchlog_replay_throttle_in_kb: 1024 + + # Authentication backend, implementing IAuthenticator; used to identify users + # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, + # PasswordAuthenticator}. + # + # - AllowAllAuthenticator performs no checks - set it to disable authentication. + # - PasswordAuthenticator relies on username/password pairs to authenticate + # users. It keeps usernames and hashed passwords in system_auth.roles table. + # Please increase system_auth keyspace replication factor if you use this authenticator. + # If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) + authenticator: AllowAllAuthenticator + + # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions + # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, + # CassandraAuthorizer}. + # + # - AllowAllAuthorizer allows any action to any user - set it to disable authorization. + # - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please + # increase system_auth keyspace replication factor if you use this authorizer. + authorizer: AllowAllAuthorizer + + # Part of the Authentication & Authorization backend, implementing IRoleManager; used + # to maintain grants and memberships between roles. + # Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, + # which stores role information in the system_auth keyspace. Most functions of the + # IRoleManager require an authenticated login, so unless the configured IAuthenticator + # actually implements authentication, most of this functionality will be unavailable. + # + # - CassandraRoleManager stores role data in the system_auth keyspace. Please + # increase system_auth keyspace replication factor if you use this role manager. + role_manager: CassandraRoleManager + + # Validity period for roles cache (fetching granted roles can be an expensive + # operation depending on the role manager, CassandraRoleManager is one example) + # Granted roles are cached for authenticated sessions in AuthenticatedUser and + # after the period specified here, become eligible for (async) reload. + # Defaults to 2000, set to 0 to disable caching entirely. + # Will be disabled automatically for AllowAllAuthenticator. + roles_validity_in_ms: 2000 + + # Refresh interval for roles cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If roles_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as roles_validity_in_ms. + # roles_update_interval_in_ms: 2000 + + # Validity period for permissions cache (fetching permissions can be an + # expensive operation depending on the authorizer, CassandraAuthorizer is + # one example). Defaults to 2000, set to 0 to disable. + # Will be disabled automatically for AllowAllAuthorizer. + permissions_validity_in_ms: 2000 + + # Refresh interval for permissions cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If permissions_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as permissions_validity_in_ms. + # permissions_update_interval_in_ms: 2000 + + # Validity period for credentials cache. This cache is tightly coupled to + # the provided PasswordAuthenticator implementation of IAuthenticator. If + # another IAuthenticator implementation is configured, this cache will not + # be automatically used and so the following settings will have no effect. + # Please note, credentials are cached in their encrypted form, so while + # activating this cache may reduce the number of queries made to the + # underlying table, it may not bring a significant reduction in the + # latency of individual authentication attempts. + # Defaults to 2000, set to 0 to disable credentials caching. + credentials_validity_in_ms: 2000 + + # Refresh interval for credentials cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If credentials_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as credentials_validity_in_ms. + # credentials_update_interval_in_ms: 2000 + + # The partitioner is responsible for distributing groups of rows (by + # partition key) across nodes in the cluster. You should leave this + # alone for new clusters. The partitioner can NOT be changed without + # reloading all data, so when upgrading you should set this to the + # same partitioner you were already using. + # + # Besides Murmur3Partitioner, partitioners included for backwards + # compatibility include RandomPartitioner, ByteOrderedPartitioner, and + # OrderPreservingPartitioner. + # + partitioner: org.apache.cassandra.dht.Murmur3Partitioner + + # Directories where Cassandra should store data on disk. Cassandra + # will spread data evenly across them, subject to the granularity of + # the configured compaction strategy. + # If not set, the default directory is $CASSANDRA_HOME/data/data. + # data_file_directories: + # - /var/lib/cassandra/data + + # commit log. when running on magnetic HDD, this should be a + # separate spindle than the data directories. + # If not set, the default directory is $CASSANDRA_HOME/data/commitlog. + # commitlog_directory: /var/lib/cassandra/commitlog + + # Enable / disable CDC functionality on a per-node basis. This modifies the logic used + # for write path allocation rejection (standard: never reject. cdc: reject Mutation + # containing a CDC-enabled table if at space limit in cdc_raw_directory). + cdc_enabled: false + + # CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the + # segment contains mutations for a CDC-enabled table. This should be placed on a + # separate spindle than the data directories. If not set, the default directory is + # $CASSANDRA_HOME/data/cdc_raw. + # cdc_raw_directory: /var/lib/cassandra/cdc_raw + + # Policy for data disk failures: + # + # die + # shut down gossip and client transports and kill the JVM for any fs errors or + # single-sstable errors, so the node can be replaced. + # + # stop_paranoid + # shut down gossip and client transports even for single-sstable errors, + # kill the JVM for errors during startup. + # + # stop + # shut down gossip and client transports, leaving the node effectively dead, but + # can still be inspected via JMX, kill the JVM for errors during startup. + # + # best_effort + # stop using the failed disk and respond to requests based on + # remaining available sstables. This means you WILL see obsolete + # data at CL.ONE! + # + # ignore + # ignore fatal errors and let requests fail, as in pre-1.2 Cassandra + disk_failure_policy: stop + + # Policy for commit disk failures: + # + # die + # shut down gossip and Thrift and kill the JVM, so the node can be replaced. + # + # stop + # shut down gossip and Thrift, leaving the node effectively dead, but + # can still be inspected via JMX. + # + # stop_commit + # shutdown the commit log, letting writes collect but + # continuing to service reads, as in pre-2.0.5 Cassandra + # + # ignore + # ignore fatal errors and let the batches fail + commit_failure_policy: stop + + # Maximum size of the native protocol prepared statement cache + # + # Valid values are either "auto" (omitting the value) or a value greater 0. + # + # Note that specifying a too large value will result in long running GCs and possbily + # out-of-memory errors. Keep the value at a small fraction of the heap. + # + # If you constantly see "prepared statements discarded in the last minute because + # cache limit reached" messages, the first step is to investigate the root cause + # of these messages and check whether prepared statements are used correctly - + # i.e. use bind markers for variable parts. + # + # Do only change the default value, if you really have more prepared statements than + # fit in the cache. In most cases it is not neccessary to change this value. + # Constantly re-preparing statements is a performance penalty. + # + # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater + prepared_statements_cache_size_mb: + + # Maximum size of the Thrift prepared statement cache + # + # If you do not use Thrift at all, it is safe to leave this value at "auto". + # + # See description of 'prepared_statements_cache_size_mb' above for more information. + # + # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater + thrift_prepared_statements_cache_size_mb: + + # Maximum size of the key cache in memory. + # + # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the + # minimum, sometimes more. The key cache is fairly tiny for the amount of + # time it saves, so it's worthwhile to use it at large numbers. + # The row cache saves even more time, but must contain the entire row, + # so it is extremely space-intensive. It's best to only use the + # row cache if you have hot rows or static rows. + # + # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. + # + # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. + key_cache_size_in_mb: + + # Duration in seconds after which Cassandra should + # save the key cache. Caches are saved to saved_caches_directory as + # specified in this configuration file. + # + # Saved caches greatly improve cold-start speeds, and is relatively cheap in + # terms of I/O for the key cache. Row cache saving is much more expensive and + # has limited use. + # + # Default is 14400 or 4 hours. + key_cache_save_period: 14400 + + # Number of keys from the key cache to save + # Disabled by default, meaning all keys are going to be saved + # key_cache_keys_to_save: 100 + + # Row cache implementation class name. Available implementations: + # + # org.apache.cassandra.cache.OHCProvider + # Fully off-heap row cache implementation (default). + # + # org.apache.cassandra.cache.SerializingCacheProvider + # This is the row cache implementation availabile + # in previous releases of Cassandra. + # row_cache_class_name: org.apache.cassandra.cache.OHCProvider + + # Maximum size of the row cache in memory. + # Please note that OHC cache implementation requires some additional off-heap memory to manage + # the map structures and some in-flight memory during operations before/after cache entries can be + # accounted against the cache capacity. This overhead is usually small compared to the whole capacity. + # Do not specify more memory that the system can afford in the worst usual situation and leave some + # headroom for OS block level cache. Do never allow your system to swap. + # + # Default value is 0, to disable row caching. + row_cache_size_in_mb: 0 + + # Duration in seconds after which Cassandra should save the row cache. + # Caches are saved to saved_caches_directory as specified in this configuration file. + # + # Saved caches greatly improve cold-start speeds, and is relatively cheap in + # terms of I/O for the key cache. Row cache saving is much more expensive and + # has limited use. + # + # Default is 0 to disable saving the row cache. + row_cache_save_period: 0 + + # Number of keys from the row cache to save. + # Specify 0 (which is the default), meaning all keys are going to be saved + # row_cache_keys_to_save: 100 + + # Maximum size of the counter cache in memory. + # + # Counter cache helps to reduce counter locks' contention for hot counter cells. + # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before + # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration + # of the lock hold, helping with hot counter cell updates, but will not allow skipping + # the read entirely. Only the local (clock, count) tuple of a counter cell is kept + # in memory, not the whole counter, so it's relatively cheap. + # + # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. + # + # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. + # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. + counter_cache_size_in_mb: + + # Duration in seconds after which Cassandra should + # save the counter cache (keys only). Caches are saved to saved_caches_directory as + # specified in this configuration file. + # + # Default is 7200 or 2 hours. + counter_cache_save_period: 7200 + + # Number of keys from the counter cache to save + # Disabled by default, meaning all keys are going to be saved + # counter_cache_keys_to_save: 100 + + # saved caches + # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. + # saved_caches_directory: /var/lib/cassandra/saved_caches + + # Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting + # the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup + # while still having the cache during runtime. + # cache_load_timeout_seconds: 30 + + # commitlog_sync may be either "periodic" or "batch." + # + # When in batch mode, Cassandra won't ack writes until the commit log + # has been fsynced to disk. It will wait + # commitlog_sync_batch_window_in_ms milliseconds between fsyncs. + # This window should be kept short because the writer threads will + # be unable to do extra work while waiting. (You may need to increase + # concurrent_writes for the same reason.) + # + # commitlog_sync: batch + # commitlog_sync_batch_window_in_ms: 2 + # + # the other option is "periodic" where writes may be acked immediately + # and the CommitLog is simply synced every commitlog_sync_period_in_ms + # milliseconds. + commitlog_sync: periodic + commitlog_sync_period_in_ms: 10000 + + # The size of the individual commitlog file segments. A commitlog + # segment may be archived, deleted, or recycled once all the data + # in it (potentially from each columnfamily in the system) has been + # flushed to sstables. + # + # The default size is 32, which is almost always fine, but if you are + # archiving commitlog segments (see commitlog_archiving.properties), + # then you probably want a finer granularity of archiving; 8 or 16 MB + # is reasonable. + # Max mutation size is also configurable via max_mutation_size_in_kb setting in + # cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. + # This should be positive and less than 2048. + # + # NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must + # be set to at least twice the size of max_mutation_size_in_kb / 1024 + # + {{- if eq .Values.janusgraph.atomic_mutation false}} + commitlog_segment_size_in_mb: 32 + {{- else }} + commitlog_segment_size_in_mb: {{ .Values.janusgraph.commitlog_segment_size_in_mb }} + {{- end }} + + # Compression to apply to the commit log. If omitted, the commit log + # will be written uncompressed. LZ4, Snappy, and Deflate compressors + # are supported. + # commitlog_compression: + # - class_name: LZ4Compressor + # parameters: + # - + + # any class that implements the SeedProvider interface and has a + # constructor that takes a Map of parameters will do. + seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local" + + # For workloads with more data than can fit in memory, Cassandra's + # bottleneck will be reads that need to fetch data from + # disk. "concurrent_reads" should be set to (16 * number_of_drives) in + # order to allow the operations to enqueue low enough in the stack + # that the OS and drives can reorder them. Same applies to + # "concurrent_counter_writes", since counter writes read the current + # values before incrementing and writing them back. + # + # On the other hand, since writes are almost never IO bound, the ideal + # number of "concurrent_writes" is dependent on the number of cores in + # your system; (8 * number_of_cores) is a good rule of thumb. + concurrent_reads: 32 + concurrent_writes: 32 + concurrent_counter_writes: 32 + + # For materialized view writes, as there is a read involved, so this should + # be limited by the less of concurrent reads or concurrent writes. + concurrent_materialized_view_writes: 32 + + # Maximum memory to use for sstable chunk cache and buffer pooling. + # 32MB of this are reserved for pooling buffers, the rest is used as an + # cache that holds uncompressed sstable chunks. + # Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, + # so is in addition to the memory allocated for heap. The cache also has on-heap + # overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size + # if the default 64k chunk size is used). + # Memory is only allocated when needed. + # file_cache_size_in_mb: 512 + + # Flag indicating whether to allocate on or off heap when the sstable buffer + # pool is exhausted, that is when it has exceeded the maximum memory + # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + + # buffer_pool_use_heap_if_exhausted: true + + # The strategy for optimizing disk read + # Possible values are: + # ssd (for solid state disks, the default) + # spinning (for spinning disks) + # disk_optimization_strategy: ssd + + # Total permitted memory to use for memtables. Cassandra will stop + # accepting writes when the limit is exceeded until a flush completes, + # and will trigger a flush based on memtable_cleanup_threshold + # If omitted, Cassandra will set both to 1/4 the size of the heap. + # memtable_heap_space_in_mb: 2048 + # memtable_offheap_space_in_mb: 2048 + + # memtable_cleanup_threshold is deprecated. The default calculation + # is the only reasonable choice. See the comments on memtable_flush_writers + # for more information. + # + # Ratio of occupied non-flushing memtable size to total permitted size + # that will trigger a flush of the largest memtable. Larger mct will + # mean larger flushes and hence less compaction, but also less concurrent + # flush activity which can make it difficult to keep your disks fed + # under heavy write load. + # + # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) + # memtable_cleanup_threshold: 0.11 + + # Specify the way Cassandra allocates and manages memtable memory. + # Options are: + # + # heap_buffers + # on heap nio buffers + # + # offheap_buffers + # off heap (direct) nio buffers + # + # offheap_objects + # off heap objects + memtable_allocation_type: heap_buffers + + # Limits the maximum Merkle tree depth to avoid consuming too much + # memory during repairs. + # + # The default setting of 18 generates trees of maximum size around + # 50 MiB / tree. If you are running out of memory during repairs consider + # lowering this to 15 (~6 MiB / tree) or lower, but try not to lower it + # too much past that or you will lose too much resolution and stream + # too much redundant data during repair. Cannot be set lower than 10. + # + # For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. + # + # repair_session_max_tree_depth: 18 + + # Total space to use for commit logs on disk. + # + # If space gets above this value, Cassandra will flush every dirty CF + # in the oldest segment and remove it. So a small total commitlog space + # will tend to cause more flush activity on less-active columnfamilies. + # + # The default value is the smaller of 8192, and 1/4 of the total space + # of the commitlog volume. + # + # commitlog_total_space_in_mb: 8192 + + # This sets the number of memtable flush writer threads per disk + # as well as the total number of memtables that can be flushed concurrently. + # These are generally a combination of compute and IO bound. + # + # Memtable flushing is more CPU efficient than memtable ingest and a single thread + # can keep up with the ingest rate of a whole server on a single fast disk + # until it temporarily becomes IO bound under contention typically with compaction. + # At that point you need multiple flush threads. At some point in the future + # it may become CPU bound all the time. + # + # You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation + # metric which should be 0, but will be non-zero if threads are blocked waiting on flushing + # to free memory. + # + # memtable_flush_writers defaults to two for a single data directory. + # This means that two memtables can be flushed concurrently to the single data directory. + # If you have multiple data directories the default is one memtable flushing at a time + # but the flush will use a thread per data directory so you will get two or more writers. + # + # Two is generally enough to flush on a fast disk [array] mounted as a single data directory. + # Adding more flush writers will result in smaller more frequent flushes that introduce more + # compaction overhead. + # + # There is a direct tradeoff between number of memtables that can be flushed concurrently + # and flush size and frequency. More is not better you just need enough flush writers + # to never stall waiting for flushing to free memory. + # + #memtable_flush_writers: 2 + + # Total space to use for change-data-capture logs on disk. + # + # If space gets above this value, Cassandra will throw WriteTimeoutException + # on Mutations including tables with CDC enabled. A CDCCompactor is responsible + # for parsing the raw CDC logs and deleting them when parsing is completed. + # + # The default value is the min of 4096 mb and 1/8th of the total space + # of the drive where cdc_raw_directory resides. + # cdc_total_space_in_mb: 4096 + + # When we hit our cdc_raw limit and the CDCCompactor is either running behind + # or experiencing backpressure, we check at the following interval to see if any + # new space for cdc-tracked tables has been made available. Default to 250ms + # cdc_free_space_check_interval_ms: 250 + + # A fixed memory pool size in MB for for SSTable index summaries. If left + # empty, this will default to 5% of the heap size. If the memory usage of + # all index summaries exceeds this limit, SSTables with low read rates will + # shrink their index summaries in order to meet this limit. However, this + # is a best-effort process. In extreme conditions Cassandra may need to use + # more than this amount of memory. + index_summary_capacity_in_mb: + + # How frequently index summaries should be resampled. This is done + # periodically to redistribute memory from the fixed-size pool to sstables + # proportional their recent read rates. Setting to -1 will disable this + # process, leaving existing index summaries at their current sampling level. + index_summary_resize_interval_in_minutes: 60 + + # Whether to, when doing sequential writing, fsync() at intervals in + # order to force the operating system to flush the dirty + # buffers. Enable this to avoid sudden dirty buffer flushing from + # impacting read latencies. Almost always a good idea on SSDs; not + # necessarily on platters. + trickle_fsync: false + trickle_fsync_interval_in_kb: 10240 + + # TCP port, for commands and data + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + storage_port: 7000 + + # SSL port, for encrypted communication. Unused unless enabled in + # encryption_options + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + ssl_storage_port: 7001 + + # Address or interface to bind to and tell other Cassandra nodes to connect to. + # You _must_ change this if you want multiple nodes to be able to communicate! + # + # Set listen_address OR listen_interface, not both. + # + # Leaving it blank leaves it up to InetAddress.getLocalHost(). This + # will always do the Right Thing _if_ the node is properly configured + # (hostname, name resolution, etc), and the Right Thing is to use the + # address associated with the hostname (it might not be). + # + # Setting listen_address to 0.0.0.0 is always wrong. + # + listen_address: 172.28.60.207 + + # Set listen_address OR listen_interface, not both. Interfaces must correspond + # to a single address, IP aliasing is not supported. + # listen_interface: eth0 + + # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address + # you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 + # address will be used. If true the first ipv6 address will be used. Defaults to false preferring + # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. + # listen_interface_prefer_ipv6: false + + # Address to broadcast to other Cassandra nodes + # Leaving this blank will set it to the same value as listen_address + broadcast_address: 172.28.60.207 + + # When using multiple physical network interfaces, set this + # to true to listen on broadcast_address in addition to + # the listen_address, allowing nodes to communicate in both + # interfaces. + # Ignore this property if the network configuration automatically + # routes between the public and private networks such as EC2. + # listen_on_broadcast_address: false + + # Internode authentication backend, implementing IInternodeAuthenticator; + # used to allow/disallow connections from peer nodes. + # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + + # Whether to start the native transport server. + # Please note that the address on which the native transport is bound is the + # same as the rpc_address. The port however is different and specified below. + start_native_transport: true + # port for the CQL native transport to listen for clients on + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + native_transport_port: 9042 + # Enabling native transport encryption in client_encryption_options allows you to either use + # encryption for the standard port or to use a dedicated, additional port along with the unencrypted + # standard native_transport_port. + # Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption + # for native_transport_port. Setting native_transport_port_ssl to a different value + # from native_transport_port will use encryption for native_transport_port_ssl while + # keeping native_transport_port unencrypted. + # native_transport_port_ssl: 9142 + # The maximum threads for handling requests when the native transport is used. + # This is similar to rpc_max_threads though the default differs slightly (and + # there is no native_transport_min_threads, idle threads will always be stopped + # after 30 seconds). + # native_transport_max_threads: 128 + # + # The maximum size of allowed frame. Frame (requests) larger than this will + # be rejected as invalid. The default is 256MB. If you're changing this parameter, + # you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. + # native_transport_max_frame_size_in_mb: 256 + + # The maximum number of concurrent client connections. + # The default is -1, which means unlimited. + # native_transport_max_concurrent_connections: -1 + + # The maximum number of concurrent client connections per source ip. + # The default is -1, which means unlimited. + # native_transport_max_concurrent_connections_per_ip: -1 + + # Whether to start the thrift rpc server. + start_rpc: true + + # The address or interface to bind the Thrift RPC service and native transport + # server to. + # + # Set rpc_address OR rpc_interface, not both. + # + # Leaving rpc_address blank has the same effect as on listen_address + # (i.e. it will be based on the configured hostname of the node). + # + # Note that unlike listen_address, you can specify 0.0.0.0, but you must also + # set broadcast_rpc_address to a value other than 0.0.0.0. + # + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + rpc_address: 0.0.0.0 + + # Set rpc_address OR rpc_interface, not both. Interfaces must correspond + # to a single address, IP aliasing is not supported. + # rpc_interface: eth1 + + # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address + # you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 + # address will be used. If true the first ipv6 address will be used. Defaults to false preferring + # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. + # rpc_interface_prefer_ipv6: false + + # port for Thrift to listen for clients on + rpc_port: 9160 + + # RPC address to broadcast to drivers and other Cassandra nodes. This cannot + # be set to 0.0.0.0. If left blank, this will be set to the value of + # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must + # be set. + broadcast_rpc_address: 172.28.60.207 + + # enable or disable keepalive on rpc/native connections + rpc_keepalive: true + + # Cassandra provides two out-of-the-box options for the RPC Server: + # + # sync + # One thread per thrift connection. For a very large number of clients, memory + # will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size + # per thread, and that will correspond to your use of virtual memory (but physical memory + # may be limited depending on use of stack space). + # + # hsha + # Stands for "half synchronous, half asynchronous." All thrift clients are handled + # asynchronously using a small number of threads that does not vary with the amount + # of thrift clients (and thus scales well to many clients). The rpc requests are still + # synchronous (one thread per active request). If hsha is selected then it is essential + # that rpc_max_threads is changed from the default value of unlimited. + # + # The default is sync because on Windows hsha is about 30% slower. On Linux, + # sync/hsha performance is about the same, with hsha of course using less memory. + # + # Alternatively, can provide your own RPC server by providing the fully-qualified class name + # of an o.a.c.t.TServerFactory that can create an instance of it. + rpc_server_type: sync + + # Uncomment rpc_min|max_thread to set request pool size limits. + # + # Regardless of your choice of RPC server (see above), the number of maximum requests in the + # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync + # RPC server, it also dictates the number of clients that can be connected at all). + # + # The default is unlimited and thus provides no protection against clients overwhelming the server. You are + # encouraged to set a maximum that makes sense for you in production, but do keep in mind that + # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. + # + # rpc_min_threads: 16 + # rpc_max_threads: 2048 + + # uncomment to set socket buffer sizes on rpc connections + # rpc_send_buff_size_in_bytes: + # rpc_recv_buff_size_in_bytes: + + # Uncomment to set socket buffer size for internode communication + # Note that when setting this, the buffer size is limited by net.core.wmem_max + # and when not setting it it is defined by net.ipv4.tcp_wmem + # See also: + # /proc/sys/net/core/wmem_max + # /proc/sys/net/core/rmem_max + # /proc/sys/net/ipv4/tcp_wmem + # /proc/sys/net/ipv4/tcp_wmem + # and 'man tcp' + # internode_send_buff_size_in_bytes: + + # Uncomment to set socket buffer size for internode communication + # Note that when setting this, the buffer size is limited by net.core.wmem_max + # and when not setting it it is defined by net.ipv4.tcp_wmem + # internode_recv_buff_size_in_bytes: + + # Frame size for thrift (maximum message length). + thrift_framed_transport_size_in_mb: 15 + + # Set to true to have Cassandra create a hard link to each sstable + # flushed or streamed locally in a backups/ subdirectory of the + # keyspace data. Removing these links is the operator's + # responsibility. + incremental_backups: false + + # Whether or not to take a snapshot before each compaction. Be + # careful using this option, since Cassandra won't clean up the + # snapshots for you. Mostly useful if you're paranoid when there + # is a data format change. + snapshot_before_compaction: false + + # Whether or not a snapshot is taken of the data before keyspace truncation + # or dropping of column families. The STRONGLY advised default of true + # should be used to provide data safety. If you set this flag to false, you will + # lose data on truncation or drop. + auto_snapshot: true + + # Granularity of the collation index of rows within a partition. + # Increase if your rows are large, or if you have a very large + # number of rows per partition. The competing goals are these: + # + # - a smaller granularity means more index entries are generated + # and looking up rows withing the partition by collation column + # is faster + # - but, Cassandra will keep the collation index in memory for hot + # rows (as part of the key cache), so a larger granularity means + # you can cache more hot rows + column_index_size_in_kb: 64 + + # Per sstable indexed key cache entries (the collation index in memory + # mentioned above) exceeding this size will not be held on heap. + # This means that only partition information is held on heap and the + # index entries are read from disk. + # + # Note that this size refers to the size of the + # serialized index information and not the size of the partition. + column_index_cache_size_in_kb: 2 + + # Number of simultaneous compactions to allow, NOT including + # validation "compactions" for anti-entropy repair. Simultaneous + # compactions can help preserve read performance in a mixed read/write + # workload, by mitigating the tendency of small sstables to accumulate + # during a single long running compactions. The default is usually + # fine and if you experience problems with compaction running too + # slowly or too fast, you should look at + # compaction_throughput_mb_per_sec first. + # + # concurrent_compactors defaults to the smaller of (number of disks, + # number of cores), with a minimum of 2 and a maximum of 8. + # + # If your data directories are backed by SSD, you should increase this + # to the number of cores. + #concurrent_compactors: 1 + + # Throttles compaction to the given total throughput across the entire + # system. The faster you insert data, the faster you need to compact in + # order to keep the sstable count down, but in general, setting this to + # 16 to 32 times the rate you are inserting data is more than sufficient. + # Setting this to 0 disables throttling. Note that this account for all types + # of compaction, including validation compaction. + compaction_throughput_mb_per_sec: 16 + + # When compacting, the replacement sstable(s) can be opened before they + # are completely written, and used in place of the prior sstables for + # any range that has been written. This helps to smoothly transfer reads + # between the sstables, reducing page cache churn and keeping hot rows hot + sstable_preemptive_open_interval_in_mb: 50 + + # Throttles all outbound streaming file transfers on this node to the + # given total throughput in Mbps. This is necessary because Cassandra does + # mostly sequential IO when streaming data during bootstrap or repair, which + # can lead to saturating the network connection and degrading rpc performance. + # When unset, the default is 200 Mbps or 25 MB/s. + # stream_throughput_outbound_megabits_per_sec: 200 + + # Throttles all streaming file transfer between the datacenters, + # this setting allows users to throttle inter dc stream throughput in addition + # to throttling all network stream traffic as configured with + # stream_throughput_outbound_megabits_per_sec + # When unset, the default is 200 Mbps or 25 MB/s + # inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + + # Server side timeouts for requests. The server will return a timeout exception + # to the client if it can't complete an operation within the corresponding + # timeout. Those settings are a protection against: + # 1) having client wait on an operation that might never terminate due to some + # failures. + # 2) operations that use too much CPU/read too much data (leading to memory build + # up) by putting a limit to how long an operation will execute. + # For this reason, you should avoid putting these settings too high. In other words, + # if you are timing out requests because of underlying resource constraints then + # increasing the timeout will just cause more problems. Of course putting them too + # low is equally ill-advised since clients could get timeouts even for successful + # operations just because the timeout setting is too tight. + + # How long the coordinator should wait for read operations to complete + read_request_timeout_in_ms: 5000 + # How long the coordinator should wait for seq or index scans to complete + range_request_timeout_in_ms: 10000 + # How long the coordinator should wait for writes to complete + write_request_timeout_in_ms: 2000 + # How long the coordinator should wait for counter writes to complete + counter_write_request_timeout_in_ms: 5000 + # How long a coordinator should continue to retry a CAS operation + # that contends with other proposals for the same row + cas_contention_timeout_in_ms: 1000 + # How long the coordinator should wait for truncates to complete + # (This can be much longer, because unless auto_snapshot is disabled + # we need to flush first so we can snapshot before removing the data.) + truncate_request_timeout_in_ms: 60000 + # The default timeout for other, miscellaneous operations + request_timeout_in_ms: 10000 + + # How long before a node logs slow queries. Select queries that take longer than + # this timeout to execute, will generate an aggregated log message, so that slow queries + # can be identified. Set this value to zero to disable slow query logging. + slow_query_log_timeout_in_ms: 500 + + # Enable operation timeout information exchange between nodes to accurately + # measure request timeouts. If disabled, replicas will assume that requests + # were forwarded to them instantly by the coordinator, which means that + # under overload conditions we will waste that much extra time processing + # already-timed-out requests. + # + # Warning: before enabling this property make sure to ntp is installed + # and the times are synchronized between the nodes. + cross_node_timeout: false + + # Set keep-alive period for streaming + # This node will send a keep-alive message periodically with this period. + # If the node does not receive a keep-alive message from the peer for + # 2 keep-alive cycles the stream session times out and fail + # Default value is 300s (5 minutes), which means stalled stream + # times out in 10 minutes by default + # streaming_keep_alive_period_in_secs: 300 + + # phi value that must be reached for a host to be marked down. + # most users should never need to adjust this. + # phi_convict_threshold: 8 + + # endpoint_snitch -- Set this to a class that implements + # IEndpointSnitch. The snitch has two functions: + # + # - it teaches Cassandra enough about your network topology to route + # requests efficiently + # - it allows Cassandra to spread replicas around your cluster to avoid + # correlated failures. It does this by grouping machines into + # "datacenters" and "racks." Cassandra will do its best not to have + # more than one replica on the same "rack" (which may not actually + # be a physical location) + # + # CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH + # ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. + # This means that if you start with the default SimpleSnitch, which + # locates every node on "rack1" in "datacenter1", your only options + # if you need to add another datacenter are GossipingPropertyFileSnitch + # (and the older PFS). From there, if you want to migrate to an + # incompatible snitch like Ec2Snitch you can do it by adding new nodes + # under Ec2Snitch (which will locate them in a new "datacenter") and + # decommissioning the old ones. + # + # Out of the box, Cassandra provides: + # + # SimpleSnitch: + # Treats Strategy order as proximity. This can improve cache + # locality when disabling read repair. Only appropriate for + # single-datacenter deployments. + # + # GossipingPropertyFileSnitch + # This should be your go-to snitch for production use. The rack + # and datacenter for the local node are defined in + # cassandra-rackdc.properties and propagated to other nodes via + # gossip. If cassandra-topology.properties exists, it is used as a + # fallback, allowing migration from the PropertyFileSnitch. + # + # PropertyFileSnitch: + # Proximity is determined by rack and data center, which are + # explicitly configured in cassandra-topology.properties. + # + # Ec2Snitch: + # Appropriate for EC2 deployments in a single Region. Loads Region + # and Availability Zone information from the EC2 API. The Region is + # treated as the datacenter, and the Availability Zone as the rack. + # Only private IPs are used, so this will not work across multiple + # Regions. + # + # Ec2MultiRegionSnitch: + # Uses public IPs as broadcast_address to allow cross-region + # connectivity. (Thus, you should set seed addresses to the public + # IP as well.) You will need to open the storage_port or + # ssl_storage_port on the public IP firewall. (For intra-Region + # traffic, Cassandra will switch to the private IP after + # establishing a connection.) + # + # RackInferringSnitch: + # Proximity is determined by rack and data center, which are + # assumed to correspond to the 3rd and 2nd octet of each node's IP + # address, respectively. Unless this happens to match your + # deployment conventions, this is best used as an example of + # writing a custom Snitch class and is provided in that spirit. + # + # You can use a custom Snitch by setting this to the full class name + # of the snitch, which will be assumed to be on your classpath. + endpoint_snitch: SimpleSnitch + + # controls how often to perform the more expensive part of host score + # calculation + dynamic_snitch_update_interval_in_ms: 100 + # controls how often to reset all host scores, allowing a bad host to + # possibly recover + dynamic_snitch_reset_interval_in_ms: 600000 + # if set greater than zero and read_repair_chance is < 1.0, this will allow + # 'pinning' of replicas to hosts in order to increase cache capacity. + # The badness threshold will control how much worse the pinned host has to be + # before the dynamic snitch will prefer other replicas over it. This is + # expressed as a double which represents a percentage. Thus, a value of + # 0.2 means Cassandra would continue to prefer the static snitch values + # until the pinned host was 20% worse than the fastest. + dynamic_snitch_badness_threshold: 0.1 + + # request_scheduler -- Set this to a class that implements + # RequestScheduler, which will schedule incoming client requests + # according to the specific policy. This is useful for multi-tenancy + # with a single Cassandra cluster. + # NOTE: This is specifically for requests from the client and does + # not affect inter node communication. + # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place + # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of + # client requests to a node with a separate queue for each + # request_scheduler_id. The scheduler is further customized by + # request_scheduler_options as described below. + request_scheduler: org.apache.cassandra.scheduler.NoScheduler + + # Scheduler Options vary based on the type of scheduler + # + # NoScheduler + # Has no options + # + # RoundRobin + # throttle_limit + # The throttle_limit is the number of in-flight + # requests per client. Requests beyond + # that limit are queued up until + # running requests can complete. + # The value of 80 here is twice the number of + # concurrent_reads + concurrent_writes. + # default_weight + # default_weight is optional and allows for + # overriding the default which is 1. + # weights + # Weights are optional and will default to 1 or the + # overridden default_weight. The weight translates into how + # many requests are handled during each turn of the + # RoundRobin, based on the scheduler id. + # + # request_scheduler_options: + # throttle_limit: 80 + # default_weight: 5 + # weights: + # Keyspace1: 1 + # Keyspace2: 5 + + # request_scheduler_id -- An identifier based on which to perform + # the request scheduling. Currently the only valid option is keyspace. + # request_scheduler_id: keyspace + + # Enable or disable inter-node encryption + # JVM defaults for supported SSL socket protocols and cipher suites can + # be replaced using custom encryption options. This is not recommended + # unless you have policies in place that dictate certain settings, or + # need to disable vulnerable ciphers or protocols in case the JVM cannot + # be updated. + # FIPS compliant settings can be configured at JVM level and should not + # involve changing encryption settings here: + # https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html + # *NOTE* No custom encryption options are enabled at the moment + # The available internode options are : all, none, dc, rack + # + # If set to dc cassandra will encrypt the traffic between the DCs + # If set to rack cassandra will encrypt the traffic between the racks + # + # The passwords used in these options must match the passwords used when generating + # the keystore and truststore. For instructions on generating these files, see: + # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore + # + server_encryption_options: + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + # require_client_auth: false + # require_endpoint_verification: false + + # enable or disable client/server encryption. + client_encryption_options: + enabled: false + # If enabled and optional is set to true encrypted and unencrypted connections are handled. + optional: false + keystore: conf/.keystore + keystore_password: cassandra + # require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + + # internode_compression controls whether traffic between nodes is + # compressed. + # Can be: + # + # all + # all traffic is compressed + # + # dc + # traffic between different datacenters is compressed + # + # none + # nothing is compressed. + internode_compression: dc + + # Enable or disable tcp_nodelay for inter-dc communication. + # Disabling it will result in larger (but fewer) network packets being sent, + # reducing overhead from the TCP protocol itself, at the cost of increasing + # latency if you block for cross-datacenter responses. + inter_dc_tcp_nodelay: false + + # TTL for different trace types used during logging of the repair process. + tracetype_query_ttl: 86400 + tracetype_repair_ttl: 604800 + + # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level + # This threshold can be adjusted to minimize logging if necessary + # gc_log_threshold_in_ms: 200 + + # If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at + # INFO level + # UDFs (user defined functions) are disabled by default. + # As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. + enable_user_defined_functions: false + + # Enables scripted UDFs (JavaScript UDFs). + # Java UDFs are always enabled, if enable_user_defined_functions is true. + # Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. + # This option has no effect, if enable_user_defined_functions is false. + enable_scripted_user_defined_functions: false + + # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. + # Lowering this value on Windows can provide much tighter latency and better throughput, however + # some virtualized environments may see a negative performance impact from changing this setting + # below their system default. The sysinternals 'clockres' tool can confirm your system's default + # setting. + windows_timer_interval: 1 + + + # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from + # a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by + # the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys + # can still (and should!) be in the keystore and will be used on decrypt operations + # (to handle the case of key rotation). + # + # It is strongly recommended to download and install Java Cryptography Extension (JCE) + # Unlimited Strength Jurisdiction Policy Files for your version of the JDK. + # (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) + # + # Currently, only the following file types are supported for transparent data encryption, although + # more are coming in future cassandra releases: commitlog, hints + transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + + ##################### + # SAFETY THRESHOLDS # + ##################### + + # When executing a scan, within or across a partition, we need to keep the + # tombstones seen in memory so we can return them to the coordinator, which + # will use them to make sure other replicas also know about the deleted rows. + # With workloads that generate a lot of tombstones, this can cause performance + # problems and even exaust the server heap. + # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) + # Adjust the thresholds here if you understand the dangers and want to + # scan more tombstones anyway. These thresholds may also be adjusted at runtime + # using the StorageService mbean. + tombstone_warn_threshold: 10000 + tombstone_failure_threshold: 1000000 + + # Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a + # mechanism called replica filtering protection to ensure that results from stale replicas do + # not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This + # mechanism materializes replica results by partition on-heap at the coordinator. The more possibly + # stale results returned by the replicas, the more rows materialized during the query. + replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + + {{- if eq .Values.janusgraph.atomic_mutation false}} + # Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. + # Caution should be taken on increasing the size of this threshold as it can lead to node instability. + batch_size_warn_threshold_in_kb: 5 + + # Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. + batch_size_fail_threshold_in_kb: 50 + {{- else }} + batch_size_warn_threshold_in_kb: {{ .Values.janusgraph.cassandra_batch_size_warn_threshold_in_kb }} + + batch_size_fail_threshold_in_kb: {{ .Values.janusgraph.cassandra_batch_size_failure_threshold_in_kb }} + {{- end }} + + # Log WARN on any batches not of type LOGGED than span across more partitions than this limit + unlogged_batch_across_partitions_warn_threshold: 10 + + # Log a warning when compacting partitions larger than this value + compaction_large_partition_warning_threshold_mb: 100 + + # GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level + # Adjust the threshold based on your application throughput requirement + # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level + gc_warn_threshold_in_ms: 1000 + + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption + # early. Any value size larger than this threshold will result into marking an SSTable + # as corrupted. This should be positive and less than 2048. + # max_value_size_in_mb: 256 + + # Back-pressure settings # + # If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation + # sent to replicas, with the aim of reducing pressure on overloaded replicas. + back_pressure_enabled: false + # The back-pressure strategy applied. + # The default implementation, RateBasedBackPressure, takes three arguments: + # high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. + # If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor; + # if above high ratio, the rate limiting is increased by the given factor; + # such factor is usually best configured between 1 and 10, use larger values for a faster recovery + # at the expense of potentially more dropped mutations; + # the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica, + # if SLOW at the speed of the slowest one. + # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and + # provide a public constructor accepting a Map. + back_pressure_strategy: + - class_name: org.apache.cassandra.net.RateBasedBackPressure + parameters: + - high_ratio: 0.90 + factor: 5 + flow: FAST + + # Coalescing Strategies # + # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). + # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in + # virtualized environments, the point at which an application can be bound by network packet processing can be + # surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal + # doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process + # is sufficient for many applications such that no load starvation is experienced even without coalescing. + # There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages + # per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one + # trip to read from a socket, and all the task submission work can be done at the same time reducing context switching + # and increasing cache friendliness of network message processing. + # See CASSANDRA-8692 for details. + + # Strategy to use for coalescing messages in OutboundTcpConnection. + # Can be fixed, movingaverage, timehorizon, disabled (default). + # You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. + # otc_coalescing_strategy: DISABLED + + # How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first + # message is received before it will be sent with any accompanying messages. For moving average this is the + # maximum amount of time that will be waited as well as the interval at which messages must arrive on average + # for coalescing to be enabled. + # otc_coalescing_window_us: 200 + + # Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. + # otc_coalescing_enough_coalesced_messages: 8 + + # How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. + # Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory + # taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value + # will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU + # time and queue contention while iterating the backlog of messages. + # An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. + # + # otc_backlog_expiration_interval_ms: 200 + + + ######################### + # EXPERIMENTAL FEATURES # + ######################### + + # Enables materialized view creation on this node. + # Materialized views are considered experimental and are not recommended for production use. + enable_materialized_views: true + + # Enables SASI index creation on this node. + # SASI indexes are considered experimental and are not recommended for production use. + enable_sasi_indexes: true + + # Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. + # 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. + enable_drop_compact_storage: false + cassandra-env.sh: | + # Licensed to the Apache Software Foundation (ASF) under one + # or more contributor license agreements. See the NOTICE file + # distributed with this work for additional information + # regarding copyright ownership. The ASF licenses this file + # to you under the Apache License, Version 2.0 (the + # "License"); you may not use this file except in compliance + # with the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + + calculate_heap_sizes() + { + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi + } + + # Determine the sort of JVM we'll be running on. + java_ver_output=`"${JAVA:-java}" -version 2>&1` + jvmver=`echo "$java_ver_output" | grep '[openjdk|java] version' | awk -F'"' 'NR==1 {print $2}' | cut -d\- -f1` + JVM_VERSION=${jvmver%_*} + JVM_PATCH_VERSION=${jvmver#*_} + + if [ "$JVM_VERSION" \< "1.8" ] ; then + echo "Cassandra 3.0 and later require Java 8u40 or later." + exit 1; + fi + + if [ "$JVM_VERSION" \< "1.8" ] && [ "$JVM_PATCH_VERSION" -lt 40 ] ; then + echo "Cassandra 3.0 and later require Java 8u40 or later." + exit 1; + fi + + jvm=`echo "$java_ver_output" | grep -A 1 '[openjdk|java] version' | awk 'NR==2 {print $1}'` + case "$jvm" in + OpenJDK) + JVM_VENDOR=OpenJDK + # this will be "64-Bit" or "32-Bit" + JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $2}'` + ;; + "Java(TM)") + JVM_VENDOR=Oracle + # this will be "64-Bit" or "32-Bit" + JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $3}'` + ;; + *) + # Help fill in other JVM values + JVM_VENDOR=other + JVM_ARCH=unknown + ;; + esac + + # Sets the path where logback and GC logs are written. + if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" + fi + + #GC log path has to be defined here because it needs to access CASSANDRA_HOME + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + + # Here we create the arguments that will get passed to the jvm when + # starting cassandra. + + # Read user-defined JVM options from jvm.options file + JVM_OPTS_FILE=$CASSANDRA_CONF/jvm.options + for opt in `grep "^-" $JVM_OPTS_FILE` + do + JVM_OPTS="$JVM_OPTS $opt" + done + + # Check what parameters were defined on jvm.options file to avoid conflicts + echo $JVM_OPTS | grep -q Xmn + DEFINED_XMN=$? + echo $JVM_OPTS | grep -q Xmx + DEFINED_XMX=$? + echo $JVM_OPTS | grep -q Xms + DEFINED_XMS=$? + echo $JVM_OPTS | grep -q UseConcMarkSweepGC + USING_CMS=$? + echo $JVM_OPTS | grep -q UseG1GC + USING_G1=$? + + # Override these to set the amount of memory to allocate to the JVM at + # start-up. For production use you may wish to adjust this for your + # environment. MAX_HEAP_SIZE is the total amount of memory dedicated + # to the Java heap. HEAP_NEWSIZE refers to the size of the young + # generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set + # or not (if you set one, set the other). + # + # The main trade-off for the young generation is that the larger it + # is, the longer GC pause times will be. The shorter it is, the more + # expensive GC will be (usually). + # + # The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause + # times. If in doubt, and if you do not particularly want to tweak, go with + # 100 MB per physical CPU core. + + #MAX_HEAP_SIZE="4G" + #HEAP_NEWSIZE="800M" + + # Set this to control the amount of arenas per-thread in glibc + #export MALLOC_ARENA_MAX=4 + + # only calculate the size if it's not set manually + if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes + elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 + fi + + if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 + fi + + # We only set -Xms and -Xmx if they were not defined on jvm.options file + # If defined, both Xmx and Xms should be defined together. + if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" + elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm.options file." + exit 1 + fi + + # We only set -Xmn flag if it was not defined in jvm.options file + # and if the CMS GC is being used + # If defined, both Xmn and Xmx should be defined together. + if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm.options file." + exit 1 + elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" + fi + + if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" + fi + + # provides hints to the JIT compiler + JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + + # add the jamm javaagent + JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.0.jar" + + # set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR + if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" + fi + + # stop the jvm on OutOfMemoryError as it can result in some data corruption + # uncomment the preferred option + # ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 + # For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words + # on white spaces without taking quotes into account + # JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" + # JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" + JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + + # print an heap histogram on OutOfMemoryError + # JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + + # jmx: metrics and administration interface + # + # add this if you're having trouble connecting: + # JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" + # + # see + # https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole + # for more on configuring JMX through firewalls, etc. (Short version: + # get it working with no firewall first.) + # + # Cassandra ships with JMX accessible *only* from localhost. + # To enable remote JMX connections, uncomment lines below + # with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity + # + if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes + fi + + # Specifies the default port over which Cassandra will be available for + # JMX connections. + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + JMX_PORT="7199" + + if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" + else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" + fi + + # jmx authentication and authorization options. By default, auth is only + # activated for remote connections but they can also be enabled for local only JMX + ## Basic file based authn & authz + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" + ## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. + ## JAAS login modules can be used for authentication by uncommenting these two properties. + ## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - + ## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration + ## file cassandra-jaas.config + #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" + #JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + + ## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, + ## uncomment this to use it. Requires one of the two authentication options to be enabled + #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + + # To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ + # directory. + # See http://cassandra.apache.org/doc/3.11/operating/metrics.html#jmx + # By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines + # to control its listen address and port. + #MX4J_ADDRESS="-Dmx4jaddress=127.0.0.1" + #MX4J_PORT="-Dmx4jport=8081" + + # Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 + # for SIGAR we have to set the java.library.path + # to the location of the native libraries. + JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/configmap.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/configmap.yaml new file mode 100755 index 00000000000..0001c57aa16 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/configmap.yaml @@ -0,0 +1,16 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.configOverrides }} +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ template "cassandra.name" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ toYaml .Values.configOverrides | indent 2 }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/pdb.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/pdb.yaml new file mode 100755 index 00000000000..70083520407 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/pdb.yaml @@ -0,0 +1,24 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.podDisruptionBudget -}} +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + labels: + app: {{ template "cassandra.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} +spec: + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} +{{ toYaml .Values.podDisruptionBudget | indent 2 }} +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml new file mode 100644 index 00000000000..1341fad52d1 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml @@ -0,0 +1,95 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.reaper.enable -}} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "cassandra.fullname" . }}-reaper-cron + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }}-reaper + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + schedule: "00 11 * * 6" #At 11 AM Saturday, UTC. + failedJobsHistoryLimit: {{ .Values.reaper.failedJobsHistoryLimit }} + successfulJobsHistoryLimit: {{ .Values.reaper.successfulJobsHistoryLimit }} + concurrencyPolicy: {{ .Values.reaper.concurrencyPolicy }} + jobTemplate: + spec: + activeDeadlineSeconds: {{ .Values.reaper.activeDeadlineSeconds }} + backoffLimit: {{ .Values.reaper.backoffLimit }} + template: + spec: + restartPolicy: {{ .Values.reaper.restartPolicy }} + {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} + {{- if or .Values.tolerations $multiarchEnabled }} + tolerations: + {{- if .Values.tolerations }} + {{ toYaml .Values.tolerations | nindent 14 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 14 }} + {{- end }} + containers: + - name: reaper + {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper }} + image: {{ .Values.multiarch.image.reaper }} + {{- else }} + image: "{{ .Values.reaper.image.repository }}:{{ .Values.reaper.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.reaper.image.pullPolicy }} + env: + {{- range $key, $value := .Values.reaper.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.reaper.port }} + protocol: TCP + livenessProbe: + httpGet: + path: /ping + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ping + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + resources: + requests: + cpu: {{ .Values.reaper.resources.requests.cpu }} + memory: {{ .Values.reaper.resources.requests.memory }} + limits: + cpu: {{ .Values.reaper.resources.limits.cpu }} + memory: {{ .Values.reaper.resources.limits.memory }} + - name: reaper-repair-sidecar + {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper_repair }} + image: {{ .Values.multiarch.image.reaper_repair }} + {{- else }} + image: "{{.Values.reaper.sidecar.image.repository}}:{{.Values.reaper.sidecar.image.tag}}" + {{- end }} + imagePullPolicy: {{.Values.reaper.sidecar.image.pullPolicy}} + env: + {{- range $key, $value := .Values.reaper.sidecar.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.reaper.sidecar.resources | nindent 20 }} + imagePullSecrets: + - name: {{ .Values.image.pullSecrets }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/service.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/service.yaml new file mode 100755 index 00000000000..2f4856252b7 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/service.yaml @@ -0,0 +1,48 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + clusterIP: None + type: {{ .Values.service.type }} + ports: + {{- if .Values.exporter.enabled }} + - name: metrics + port: 5556 + targetPort: {{ .Values.exporter.port }} + {{- end }} + - name: intra + port: 7000 + targetPort: 7000 + - name: tls + port: 7001 + targetPort: 7001 + - name: jmx + port: 7199 + targetPort: 7199 + - name: cql + port: {{ default 9042 .Values.config.ports.cql }} + targetPort: {{ default 9042 .Values.config.ports.cql }} + - name: thrift + port: {{ default 9160 .Values.config.ports.thrift }} + targetPort: {{ default 9160 .Values.config.ports.thrift }} + {{- if .Values.config.ports.agent }} + - name: agent + port: {{ .Values.config.ports.agent }} + targetPort: {{ .Values.config.ports.agent }} + {{- end }} + selector: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/servicemonitor.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/servicemonitor.yaml new file mode 100755 index 00000000000..6cbe06bf2df --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/servicemonitor.yaml @@ -0,0 +1,31 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if and .Values.exporter.enabled .Values.exporter.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "cassandra.fullname" . }} + {{- if .Values.exporter.serviceMonitor.namespace }} + namespace: {{ .Values.exporter.serviceMonitor.namespace }} + {{- end }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "prometheus-operator" + heritage: {{ .Release.Service }} + {{- if .Values.exporter.serviceMonitor.additionalLabels }} +{{ toYaml .Values.exporter.serviceMonitor.additionalLabels | indent 4 }} + {{- end }} +spec: + jobLabel: {{ template "cassandra.name" . }} + endpoints: + - port: metrics + interval: 60s + path: /metrics + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + namespaceSelector: + any: true + {{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml new file mode 100755 index 00000000000..b3cdf36d5b8 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml @@ -0,0 +1,442 @@ +{{- if or .Values.global.svcIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- $cloud := .Values.global.cloud }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "cassandra-online-dc-config" +spec: + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + serviceName: {{ template "cassandra.fullname" . }} + replicas: {{ .Values.config.cluster_size }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy.type }} + template: + metadata: + labels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} +{{- if .Values.selector }} +{{ toYaml .Values.selector | indent 6 }} +{{- end }} + {{- if .Values.securityContext.enabled }} + securityContext: + fsGroup: {{ .Values.securityContext.fsGroup }} + runAsUser: {{ .Values.securityContext.runAsUser }} + {{- end }} +{{- $tierType := .Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + affinity: + nodeAffinity: + {{- if eq .Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- if and .Values.multiarch (hasKey .Values.multiarch "enabled") (eq .Values.multiarch.enabled true) }} + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + {{- end }} + {{- toYaml .Values.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + podAntiAffinity: + {{- if eq .Values.antiAffinity "soft" }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} +{{- end }} + {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} + {{- if or .Values.tolerations $multiarchEnabled }} + tolerations: + {{- if .Values.tolerations }} + {{ toYaml .Values.tolerations | nindent 8 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} +# {{- if .Values.configOverrides }} +# initContainers: +# - name: config-copier +# image: ghcr.io/atlanhq/busybox:1.31 +# command: [ 'sh', '-c', 'cp /configmap-files/* /cassandra-configs/ && chown 999:999 /cassandra-configs/*'] +# volumeMounts: +# {{- range $key, $value := .Values.configOverrides }} +# - name: cassandra-config-{{ $key | replace "." "-" | replace "_" "--" }} +# mountPath: /configmap-files/{{ $key }} +# subPath: {{ $key }} +# {{- end }} +# - name: cassandra-configs +# mountPath: /cassandra-configs/ +# {{- end }} + initContainers: + - name: copy-config + {{- if and .Values.multiarch.enabled .Values.multiarch.image.busybox }} + image: {{ .Values.multiarch.image.busybox }} + {{- else }} + image: ghcr.io/atlanhq/busybox:1.32 + {{- end }} + command: ['sh', '-c', 'cp /config/cassandra.yaml /config/cassandra-env.sh /mnt/'] + volumeMounts: + - name: config + mountPath: /config/cassandra.yaml + subPath: cassandra.yaml + - name: config + mountPath: /config/cassandra-env.sh + subPath: cassandra-env.sh + - name: pre-install + mountPath: /mnt + containers: +{{- if .Values.exporter.enabled }} + - name: cassandra-exporter + {{- if and .Values.multiarch.enabled .Values.multiarch.image.cassandra_exporter }} + image: {{ .Values.multiarch.image.cassandra_exporter }} + {{- else }} + image: "{{ .Values.exporter.image.repo }}:{{ .Values.exporter.image.tag }}" + {{- end }} + resources: +{{ toYaml .Values.exporter.resources | indent 10 }} + env: + - name: CASSANDRA_EXPORTER_CONFIG_listenPort + value: {{ .Values.exporter.port | quote }} + - name: JVM_OPTS + value: {{ .Values.exporter.jvmOpts | quote }} + ports: + - name: metrics + containerPort: {{ .Values.exporter.port }} + protocol: TCP + - name: jmx + containerPort: 5555 + livenessProbe: + tcpSocket: + port: {{ .Values.exporter.port }} + readinessProbe: + httpGet: + path: /metrics + port: {{ .Values.exporter.port }} + initialDelaySeconds: 40 + timeoutSeconds: 45 +{{- end }} + - name: {{ template "cassandra.fullname" . }} + {{- if and .Values.multiarch.enabled .Values.multiarch.image.cassandra }} + image: {{ .Values.multiarch.image.cassandra }} + {{- else }} + image: "{{ .Values.image.repo }}:{{ .Values.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} +{{- if .Values.commandOverrides }} + command: {{ .Values.commandOverrides }} +{{- end }} +{{- if .Values.argsOverrides }} + args: {{ .Values.argsOverrides }} +{{- end }} +{{- $tierType := .Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + resources: +{{ toYaml .Values.resources | indent 10 }} +{{- end }} + env: + {{- $seed_size := default 1 .Values.config.seed_size | int -}} + {{- $global := . }} + - name: CASSANDRA_SEEDS + {{- if .Values.hostNetwork }} + value: {{ required "You must fill \".Values.config.seeds\" with list of Cassandra seeds when hostNetwork is set to true" .Values.config.seeds | quote }} + {{- else }} + value: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-1.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-2.atlas-cassandra.atlas.svc.cluster.local" + {{- end }} + - name: MAX_HEAP_SIZE + value: {{ default "8192M" .Values.max_heap_size | quote }} + - name: HEAP_NEWSIZE + value: {{ default "200M" .Values.heap_new_size | quote }} + - name: CASSANDRA_ENDPOINT_SNITCH + value: {{ default "SimpleSnitch" .Values.config.endpoint_snitch | quote }} + - name: CASSANDRA_CLUSTER_NAME + value: {{ default "Cassandra" .Values.config.cluster_name | quote }} + - name: CASSANDRA_DC + value: {{ default "DC1" .Values.config.dc_name | quote }} + - name: CASSANDRA_RACK + value: {{ default "RAC1" .Values.config.rack_name | quote }} + - name: CASSANDRA_START_RPC + value: {{ default "false" .Values.config.start_rpc | quote }} + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: LOCAL_JMX + value: {{ default "no" .Values.config.local_jmx | quote }} + {{- range $key, $value := .Values.env }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} + livenessProbe: + exec: + command: [ "/bin/sh", "-c", "nodetool -h ::FFFF:127.0.0.1 status" ] + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + exec: + command: [ "/bin/sh", "-c", "nodetool -h ::FFFF:127.0.0.1 status | grep -E \"^UN\\s+{{ .Values.readinessProbe.address }}\"" ] + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + ports: + - name: intra + containerPort: 7000 + - name: tls + containerPort: 7001 + - name: jmx + containerPort: 7199 + - name: cql + containerPort: {{ default 9042 .Values.config.ports.cql }} + - name: thrift + containerPort: {{ default 9160 .Values.config.ports.thrift }} + {{- if .Values.config.ports.agent }} + - name: agent + containerPort: {{ .Values.config.ports.agent }} + {{- end }} + volumeMounts: + - name: pre-install + mountPath: /etc/cassandra/cassandra.yaml + subPath: cassandra.yaml + - name: pre-install + mountPath: /etc/cassandra/cassandra-env.sh + subPath: cassandra-env.sh + - name: data + mountPath: /var/lib/cassandra + {{- if .Values.configOverrides }} + - name: cassandra-configs + mountPath: /etc/cassandra + {{- end }} + {{- if .Values.extraVolumeMounts }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 8 }} + {{- end }} + {{- end }} + {{- if not .Values.persistence.enabled }} + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "exec nodetool decommission"] + {{- end }} + - name: cassandra-icarus + {{- if and .Values.multiarch.enabled .Values.multiarch.image.icarus }} + image: {{ .Values.multiarch.image.icarus }} + {{- else }} + image: "{{ .Values.icarus.image.repo }}:{{ .Values.icarus.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + securityContext: + runAsUser: {{.Values.securityContext.runAsUser}} + runAsGroup: {{ .Values.securityContext.fsGroup }} + resources: +{{ toYaml .Values.icarus.resources | indent 10 }} + env: + - name: JMX_HOST + value: localhost + - name: JMX_PORT + value: "7199" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + {{- if eq $cloud "gcp" }} + - name: GOOGLE_APPLICATION_CREDENTIALS + value: "/var/secrets/google/gcp_credentials.json" + {{- end }} + {{- if eq $cloud "azure" }} + - name: AZURE_STORAGE_KEY + valueFrom: + secretKeyRef: + name: azurestorage + key: azure.client.default.key + - name: AZURE_STORAGE_ACCOUNT + valueFrom: + secretKeyRef: + name: azurestorage + key: azure.client.default.account + {{- end}} + ports: + - containerPort: {{ .Values.icarus.port }} + name: http + # Health checks for Icarus based on the API spec + readinessProbe: + httpGet: + path: /version # API endpoint to check version + port: {{ .Values.icarus.port }} + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + + livenessProbe: + httpGet: + path: /version # Same endpoint can be used to verify service is alive + port: {{ .Values.icarus.port }} + initialDelaySeconds: 60 # Give it time to start + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + # Wait for Cassandra container to be ready before starting + startupProbe: + httpGet: + path: /version + port: {{ .Values.icarus.port }} + failureThreshold: 30 # Allow more time during startup + periodSeconds: 10 + volumeMounts: + - name: pre-install + mountPath: /etc/cassandra/cassandra.yaml + subPath: cassandra.yaml + - name: pre-install + mountPath: /etc/cassandra/cassandra-env.sh + subPath: cassandra-env.sh + - name: data + mountPath: /var/lib/cassandra + {{- if eq $cloud "gcp" }} + - name: gcp-creds + mountPath: /var/secrets/google/gcp_credentials.json + subPath: gcp_credentials.json + readOnly: true + {{- end }} + + {{- if .Values.extraContainers }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} + terminationGracePeriodSeconds: {{ default 30 .Values.podSettings.terminationGracePeriodSeconds }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + - name: {{ .Values.image.pullSecrets }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- if or .Values.configOverrides (not .Values.persistence.enabled) }} + volumes: +{{- end }} +{{- range $key, $value := .Values.configOverrides }} + - configMap: + name: cassandra + name: cassandra-config-{{ $key | replace "." "-" | replace "_" "--" }} +{{- end }} +{{- if .Values.configOverrides }} + - name: cassandra-configs + emptyDir: {} +{{- end }} +{{- if not .Values.persistence.enabled }} + - name: data + emptyDir: {} +{{- else }} +{{- if .Values.extraVolumes }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + volumes: + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if eq $cloud "gcp" }} + - name: gcp-creds + secret: + secretName: gcp-creds-secret-manager + items: + - key: GOOGLE_APPLICATION_CREDENTIALS + path: gcp_credentials.json + {{- end }} + + volumeClaimTemplates: + - metadata: + name: data + labels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/cassandra-online-dc/values.yaml b/helm/atlas-read/charts/cassandra-online-dc/values.yaml new file mode 100755 index 00000000000..c3cd93fc794 --- /dev/null +++ b/helm/atlas-read/charts/cassandra-online-dc/values.yaml @@ -0,0 +1,400 @@ + +multiarch: + enabled: false + image: {} + + +## Cassandra image version +## ref: https://hub.docker.com/r/library/cassandra/ +global: + Tier_Type: "" +image: + repo: ghcr.io/atlanhq/cassandra + tag: 3.11.12 + pullPolicy: IfNotPresent + ## Specify ImagePullSecrets for Pods + ## ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod + # pullSecrets: myregistrykey + +## Specify a service type +## ref: http://kubernetes.io/docs/user-guide/services/ +service: + type: ClusterIP + annotations: "" + +## Use an alternate scheduler, e.g. "stork". +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +# schedulerName: + +## Persist data to a persistent volume +# persistence: {} + # enabled: true + ## cassandra data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + # accessMode: ReadWriteOnce + # size: 10Gi + +## Configure resource requests and limits +## ref: http://kubernetes.io/docs/user-guide/compute-resources/ +## Minimum memory for development is 4GB and 2 CPU cores +## Minimum memory for production is 8GB and 4 CPU cores +## ref: http://docs.datastax.com/en/archived/cassandra/2.0/cassandra/architecture/architecturePlanningHardware_c.html +# resources: {} + # requests: + # memory: 4Gi + # cpu: 2 + # limits: + # memory: 4Gi + # cpu: 2 + +## Change cassandra configuration parameters below: +## ref: http://docs.datastax.com/en/cassandra/3.0/cassandra/configuration/configCassandra_yaml.html +## Recommended max heap size is 1/2 of system memory +## Recommended heap new size is 1/4 of max heap size +## ref: http://docs.datastax.com/en/cassandra/3.0/cassandra/operations/opsTuneJVM.html +# config: +# cluster_domain: cluster.local +# cluster_name: cassandra +# cluster_size: 3 +# seed_size: 2 +# num_tokens: 256 +# # If you want Cassandra to use this datacenter and rack name, +# # you need to set endpoint_snitch to GossipingPropertyFileSnitch. +# # Otherwise, these values are ignored and datacenter1 and rack1 +# # are used. +# dc_name: DC1 +# rack_name: RAC1 +# endpoint_snitch: SimpleSnitch +# max_heap_size: 2048M +# heap_new_size: 512M +# start_rpc: false +# ports: +# cql: 9042 +# thrift: 9160 +# # If a JVM Agent is in place +# # agent: 61621 + +# Config for cassandra + +max_heap_size: 2048M +heap_new_size: 512M + +config: + cluster_domain: cluster.local + cluster_name: cassandra + cluster_size: 3 + dc_name: online-dc + rack_name: rack1 + endpoint_snitch: GossipingPropertyFileSnitch + seed_size: 3 + start_rpc: true + ports: + cql: 9042 + + +## Cassandra config files overrides +configOverrides: {} + +## Cassandra docker command overrides +commandOverrides: [] + +## Cassandra docker args overrides +argsOverrides: [] + +## Custom env variables. +## ref: https://hub.docker.com/_/cassandra/ +env: {} + +## Liveness and Readiness probe values. +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ +livenessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 40 + successThreshold: 1 + failureThreshold: 3 +readinessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 40 + successThreshold: 1 + failureThreshold: 3 + address: "${POD_IP}" + +## Configure node selector. Edit code below for adding selector to pods +## ref: https://kubernetes.io/docs/user-guide/node-selection/ +# selector: + # nodeSelector: + # cloud.google.com/gke-nodepool: pool-db + +## Additional pod annotations +## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +# podAnnotations: {} + +## Additional pod labels +## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +## Additional pod-level settings +podSettings: + # Change this to give pods more time to properly leave the cluster when not using persistent storage. + terminationGracePeriodSeconds: 30 + +## Pod distruption budget +podDisruptionBudget: + # maxUnavailable: 1 + minAvailable: 2 + +podManagementPolicy: OrderedReady + + +updateStrategy: + type: RollingUpdate + +resources: + requests: + memory: 4Gi + cpu: 500m + limits: + memory: 5Gi + cpu: 3000m + +# Persistence changes for cassandra +persistence: + enabled: true + accessMode: ReadWriteOnce + size: 10Gi + +nodeSelector: {} + +podAnnotations: {} + # backup.velero.io/backup-volumes: data + +## Pod Security Context +securityContext: + enabled: false + fsGroup: 999 + runAsUser: 999 +# PriorityClassName +priorityClassName: "" + + +antiAffinity: "hard" + +custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + +## Affinity for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + + +## Node tolerations for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] +rbac: + # Specifies whether RBAC resources should be created + create: true + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + # name: + +# Use host network for Cassandra pods +# You must pass seed list into config.seeds property if set to true +hostNetwork: false + +## Backup cronjob configuration +## Ref: https://github.com/maorfr/cain +# Cassandra backup configuration +backup: + enabled: false + schedule: + - keyspace: atlas + cron: "0 3 * * *" + annotations: {} + image: + repository: ghcr.io/atlanhq/cain + tag: 0.6.0 + # Name of the secret containing the credentials of the service account used by GOOGLE_APPLICATION_CREDENTIALS, as a credentials.json file + extraArgs: + - -c + - atlas-cassandra + google: + serviceAccountSecret: + env: + - name: AWS_REGION + value: "" + resources: + requests: + memory: 1Gi + limits: + memory: 1Gi + destination: "" + + +## Cassandra exported configuration +## ref: https://github.com/criteo/cassandra_exporter +exporter: + # If exporter is enabled this will create a ServiceMonitor by default as well + enabled: true + serviceMonitor: + enabled: true + namespace: monitoring + lables: + release: "prometheus-operator" + additionalLabels: {} + # prometheus: default + image: + repo: ghcr.io/atlanhq/cassandra_exporter + tag: 2.0.2 + port: 5556 + jvmOpts: "" + resources: {} + # limits: + # cpu: 1 + # memory: 1Gi + # requests: + # cpu: 1 + # memory: 1Gi + +## Sidecar for backup/restore +## ref: https://github.com/instaclustr/icarus +icarus: + image: + repo: ghcr.io/atlanhq/cassandra-icarus-atlan-v2 + tag: 1.0.4 + port: 4567 + jvmOpts: "" + resources: + limits: + cpu: 1 + memory: 8Gi + requests: + cpu: 0.5 + memory: 2Gi + +extraVolumes: + - name: varlog + emptyDir: {} + - name: config + configMap: + name: cassandra-online-dc-config + - name: pre-install + emptyDir: {} + +extraVolumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + +extraContainers: + - name: cassandra-gclog + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/gc.log.0.current'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + - name: cassandra-systemlog + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/system.log'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + - name: cassandra-debug + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/debug.log'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ +janusgraph: + atomic_mutation: true + cassandra_batch_size_warn_threshold_in_kb: 4096 + cassandra_batch_size_failure_threshold_in_kb: 16384 + commitlog_segment_size_in_mb: 64 + +reaper: + enable: true + image: + repository: ghcr.io/atlanhq/cassandra-reaper + tag: 3.4.0 + pullPolicy: IfNotPresent + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + concurrencyPolicy: Forbid + activeDeadlineSeconds: 129600 # 36 hours + backoffLimit: 0 + port: 8080 + resources: + requests: + memory: 512Mi + cpu: 100m + limits: + memory: 2Gi + cpu: 500m + env: + REAPER_AUTH_ENABLED: "false" + CASSANDRA_REAPER_STORAGE_TYPE: "cassandra" + CASSANDRA_REAPER_CASS_CONTACT_POINTS: "online-dc" + CASSANDRA_REAPER_CASS_CLUSTER_NAME: "[atlas-cassandra-online-dc.atlas.svc.cluster.local]" + REAPER_CASS_KEYSPACE: "reaper_db" + REAPER_ENABLE_DYNAMIC_SEED_LIST: "true" + REAPER_ENABLE_CROSS_ORIGIN: "false" + REAPER_HANGING_REPAIR_TIMEOUT_MINS: "10" + REAPER_REPAIR_INTENSITY: "0.9" + REAPER_REPAIR_PARALELLISM: "SEQUENTIAL" + restartPolicy: OnFailure + sidecar: + image: + repository: ghcr.io/atlanhq/cassandra-reaper-atlan-v2 + tag: 1.4.0 + pullPolicy: IfNotPresent + resources: + requests: + memory: 100Mi + limits: + memory: 512Mi + env: + REAPER_URL: "http://localhost:8080" + CLUSTER_SEED_HOST: "atlas-cassandra-online-dc-0.atlas-cassandra-online-dc.atlas.svc.cluster.local,atlas-cassandra-online-dc-1.atlas-cassandra-online-dc.atlas.svc.cluster.local,atlas-cassandra-online-dc-2.atlas-cassandra-online-dc.atlas.svc.cluster.local" + KEYSPACE_NAME: "atlas" + PROMETHEUS_PUSHGATEWAY: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + POLLING_FREQUENCY_IN_SECONDS: "900" + + \ No newline at end of file diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/.helmignore b/helm/atlas-read/charts/elasticsearch-exporter-read/.helmignore new file mode 100644 index 00000000000..9e7b0bbbcdc --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/.helmignore @@ -0,0 +1,24 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +# OWNERS file for Kubernetes +OWNERS +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/Chart.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/Chart.yaml new file mode 100644 index 00000000000..485e1e3a6b7 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/Chart.yaml @@ -0,0 +1,20 @@ +apiVersion: v2 +description: Elasticsearch stats exporter for Prometheus +name: elasticsearch-exporter-read +version: 3.3.0 +kubeVersion: ">=1.10.0-0" +appVersion: 1.1.0 +home: https://github.com/justwatchcom/elasticsearch_exporter +sources: + - https://github.com/justwatchcom/elasticsearch_exporter +keywords: + - metrics + - elasticsearch + - monitoring +maintainers: + - name: svenmueller + email: sven.mueller@commercetools.com + - name: caarlos0 + email: carlos@carlosbecker.com + - name: desaintmartin + email: cedric@desaintmartin.fr diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/OWNERS b/helm/atlas-read/charts/elasticsearch-exporter-read/OWNERS new file mode 100644 index 00000000000..64fcc6642d7 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/OWNERS @@ -0,0 +1,8 @@ +approvers: +- desaintmartin +- svenmueller +- caarlos0 +reviewers: +- desaintmartin +- svenmueller +- caarlos0 diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/README.md b/helm/atlas-read/charts/elasticsearch-exporter-read/README.md new file mode 100644 index 00000000000..90bdd4c559d --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/README.md @@ -0,0 +1,146 @@ +# Elasticsearch Exporter + +Prometheus exporter for various metrics about ElasticSearch, written in Go. + +Learn more: https://github.com/justwatchcom/elasticsearch_exporter + +## TL;DR; + +```bash +$ helm install stable/elasticsearch-exporter +``` + +## Introduction + +This chart creates an Elasticsearch-Exporter deployment on a [Kubernetes](http://kubernetes.io) +cluster using the [Helm](https://helm.sh) package manager. + +## Prerequisites + +- Kubernetes 1.10+ + +## Installing the Chart + +To install the chart with the release name `my-release`: + +```bash +$ helm install --name my-release stable/elasticsearch-exporter +``` + +The command deploys Elasticsearch-Exporter on the Kubernetes cluster using the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation. + +## Uninstalling the Chart + +To uninstall/delete the `my-release` deployment: + +```bash +$ helm delete --purge my-release +``` +The command removes all the Kubernetes components associated with the chart and deletes the release. + +## Elasticsearch-exporter +Upgrading exporter version from 1.5.0 to 1.6.0. It incorporates some major bug fixes and enhancements. + +### Highlights +* Breaking Changes : The flag --es.cluster_settings has been renamed to --collector.clustersettings. +* Bug Fixes : Fix index field counts with nested fields +* [Others changes and enhancements](https://github.com/prometheus-community/elasticsearch_exporter/releases) + + +## Upgrading an existing Release to a new major version + +A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an incompatible breaking change needing manual actions. + +### To 2.0.0 + +Some kubernetes apis used from 1.x have been deprecated. You need to update your claster to kubernetes 1.10+ to support new definitions used in 2.x. + +## Configuration + +The following table lists the configurable parameters of the Elasticsearch-Exporter chart and their default values. + +Parameter | Description | Default +--- | --- | --- +`replicaCount` | desired number of pods | `1` +`restartPolicy` | container restart policy | `Always` +`image.repository` | container image repository | `justwatch/elasticsearch_exporter` +`image.tag` | container image tag | `1.1.0` +`image.pullPolicy` | container image pull policy | `IfNotPresent` +`image.pullSecret` | container image pull secret | `""` +`resources` | resource requests & limits | `{}` +`priorityClassName` | priorityClassName | `nil` +`nodeSelector` | Node labels for pod assignment | `{}` +`tolerations` | Node tolerations for pod assignment | `{}` +`podAnnotations` | Pod annotations | `{}` | +`podSecurityPolicies.enabled` | Enable/disable PodSecurityPolicy and associated Role/Rolebinding creation | `false` +`serviceAccount.create` | Create a ServiceAccount for the pod | `false` +`serviceAccount.name` | Name of a ServiceAccount to use that is not handled by this chart | `default` +`service.type` | type of service to create | `ClusterIP` +`service.httpPort` | port for the http service | `9108` +`service.metricsPort.name` | name for the http service | `http` +`service.annotations` | Annotations on the http service | `{}` +`service.labels` | Additional labels for the service definition | `{}` +`env` | Extra environment variables passed to pod | `{}` +`extraEnvSecrets` | Extra environment variables passed to the pod from k8s secrets - see `values.yaml` for an example | `{}` | +`envFromSecret` | The name of an existing secret in the same kubernetes namespace which contains values to be added to the environment | `nil` +`secretMounts` | list of secrets and their paths to mount inside the pod | `[]` +`affinity` | Affinity rules | `{}` +`es.uri` | address of the Elasticsearch node to connect to | `localhost:9200` +`es.all` | if `true`, query stats for all nodes in the cluster, rather than just the node we connect to | `true` +`es.indices` | if true, query stats for all indices in the cluster | `true` +`es.indices_mappings` | if true, query stats for all indices mapping | `true` +`es.indices_settings` | if true, query settings stats for all indices in the cluster | `true` +`es.shards` | if true, query stats for shards in the cluster | `true` +`es.cluster_settings` | if true, query stats for cluster settings | `true` +`es.snapshots` | if true, query stats for snapshots in the cluster | `true` +`es.timeout` | timeout for trying to get stats from Elasticsearch | `30s` +`es.ssl.enabled` | If true, a secure connection to Elasticsearch cluster is used | `false` +`es.ssl.useExistingSecrets` | If true, certs from secretMounts will be used | `false` +`es.ssl.ca.pem` | PEM that contains trusted CAs used for setting up secure Elasticsearch connection | +`es.ssl.ca.path` | Path of ca pem file which should match a secretMount path | +`es.ssl.client.enabled` | If true, use SSL client certificates for authentication | `true` +`es.ssl.client.pem` | PEM that contains the client cert to connect to Elasticsearch | +`es.ssl.client.pemPath` | Path of client pem file which should match a secretMount path | +`es.ssl.client.key` | Private key for client auth when connecting to Elasticsearch | +`es.ssl.client.keyPath` | Path of client key file which should match a secretMount path | +`web.path` | path under which to expose metrics | `/metrics` +`serviceMonitor.enabled` | If true, a ServiceMonitor CRD is created for a prometheus operator | `false` +`serviceMonitor.namespace` | If set, the ServiceMonitor will be installed in a different namespace | `""` +`serviceMonitor.labels` | Labels for prometheus operator | `{}` +`serviceMonitor.interval` | Interval at which metrics should be scraped | `10s` +`serviceMonitor.scrapeTimeout` | Timeout after which the scrape is ended | `10s` +`serviceMonitor.scheme` | Scheme to use for scraping | `http` +`serviceMonitor.relabelings` | Relabel configuration for the metrics | `[]` +`prometheusRule.enabled` | If true, a PrometheusRule CRD is created for a prometheus operator | `false` +`prometheusRule.namespace` | If set, the PrometheusRule will be installed in a different namespace | `""` +`prometheusRule.labels` | Labels for prometheus operator | `{}` +`prometheusRule.rules` | List of [PrometheusRules](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) to be created, check values for an example. | `[]` + +Specify each parameter using the `--set key=value[,key=value]` argument to `helm install`. For example, + +```bash +$ helm install --name my-release \ + --set key_1=value_1,key_2=value_2 \ + stable/elasticsearch-exporter +``` + +Alternatively, a YAML file that specifies the values for the parameters can be provided while installing the chart. For example, + +```bash +# example for staging +$ helm install --name my-release -f values.yaml stable/elasticsearch-exporter +``` + +> **Tip**: You can use the default [values.yaml](values.yaml) + +## Upgrading an existing Release to a new major version + +A major chart version change (like v1.2.3 -> v2.0.0) indicates that there is an +incompatible breaking change needing manual actions. + +### To 3.0.0 + +`prometheusRule.rules` are now processed as Helm template, allowing to set variables in them. +This means that if a rule contains a {{ $value }}, Helm will try replacing it and probably fail. + +You now need to escape the rules (see `values.yaml`) for examples. diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/ci/default-values.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/ci/default-values.yaml new file mode 100644 index 00000000000..fc2ba605ada --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/ci/default-values.yaml @@ -0,0 +1 @@ +# Leave this file empty to ensure that CI runs builds against the default configuration in values.yaml. diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/ci/security-context.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/ci/security-context.yaml new file mode 100644 index 00000000000..c33dc9f21a7 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/ci/security-context.yaml @@ -0,0 +1,5 @@ +--- +# Set default security context for kubernetes + +securityContext: + disable: true diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/NOTES.txt b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/NOTES.txt new file mode 100644 index 00000000000..4311f10466c --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/NOTES.txt @@ -0,0 +1,15 @@ +1. Get the application URL by running these commands: +{{- if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "elasticsearch-exporter.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT{{ .Values.web.path }} +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "elasticsearch-exporter.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "elasticsearch-exporter.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.service.httpPort }}{{ .Values.web.path }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "elasticsearch-exporter.fullname" . }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:{{ .Values.service.httpPort }}{{ .Values.web.path }} to use your application" + kubectl port-forward $POD_NAME {{ .Values.service.httpPort }}:{{ .Values.service.httpPort }} --namespace {{ .Release.Namespace }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/_helpers.tpl b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/_helpers.tpl new file mode 100644 index 00000000000..1b098d1f670 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/_helpers.tpl @@ -0,0 +1,33 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "elasticsearch-exporter.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "elasticsearch-exporter.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "elasticsearch-exporter.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/cert-secret.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/cert-secret.yaml new file mode 100644 index 00000000000..437a9035a92 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/cert-secret.yaml @@ -0,0 +1,19 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if and .Values.es.ssl.enabled (eq .Values.es.ssl.useExistingSecrets false) }} +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }}-cert + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +type: Opaque +data: + ca.pem: {{ .Values.es.ssl.ca.pem | b64enc }} + client.pem: {{ .Values.es.ssl.client.pem | b64enc }} + client.key: {{ .Values.es.ssl.client.key | b64enc }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/deployment.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/deployment.yaml new file mode 100644 index 00000000000..bfcb0bfdbdf --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/deployment.yaml @@ -0,0 +1,199 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + strategy: + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + type: RollingUpdate + template: + metadata: + labels: + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + {{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} + {{- end }} + spec: + {{- if .Values.serviceAccount.create }} + serviceAccountName: {{ template "elasticsearch-exporter.fullname" . }} + {{- else }} + serviceAccountName: {{ .Values.serviceAccount.name }} + {{- end }} +{{- if .Values.priorityClassName }} + priorityClassName: "{{ .Values.priorityClassName }}" +{{- end }} +{{- if .Values.image.pullSecret }} + imagePullSecrets: + - name: {{ .Values.image.pullSecret }} +{{- end }} + restartPolicy: {{ .Values.restartPolicy }} + {{- if .Values.securityContext.enabled }} + securityContext: + runAsNonRoot: true + runAsUser: {{ .Values.securityContext.runAsUser }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + env: + {{- range $key, $value := .Values.env }} + - name: {{ $key }} + value: "{{ $value }}" + {{- end }} + {{- range $key, $value := .Values.extraEnvSecrets }} + - name: {{ $key }} + valueFrom: + secretKeyRef: + name: {{ required "Must specify secret!" $value.secret }} + key: {{ required "Must specify key!" $value.key }} + {{- end }} + {{- if .Values.envFromSecret }} + envFrom: + - secretRef: + name: {{ .Values.envFromSecret }} + {{- end }} + {{- if and .Values.multiarch.enabled .Values.multiarch.image.exporter }} + image: {{ .Values.multiarch.image.exporter }} + {{- else }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["elasticsearch_exporter", + {{- if .Values.es.uri }} + "--es.uri={{ .Values.es.uri }}", + {{- end }} + {{- if .Values.es.all }} + "--es.all", + {{- end }} + {{- if .Values.es.indices }} + "--es.indices", + {{- end }} + {{- if .Values.es.indices_mappings }} + "--es.indices_mappings", + {{- end }} + {{- if .Values.es.indices_settings }} + "--es.indices_settings", + {{- end }} + {{- if .Values.es.shards }} + "--es.shards", + {{- end }} + {{- if .Values.es.snapshots }} + "--es.snapshots", + {{- end }} + {{- if .Values.es.cluster_settings }} + "--collector.clustersettings", + {{- end }} + "--es.timeout={{ .Values.es.timeout }}", + {{- if .Values.es.sslSkipVerify }} + "--es.ssl-skip-verify", + {{- end }} + {{- if .Values.es.ssl.enabled }} + "--es.ca={{.Values.es.ssl.ca.path }}", + {{- if .Values.es.ssl.client.enabled }} + "--es.client-cert={{ .Values.es.ssl.client.pemPath }}", + "--es.client-private-key={{ .Values.es.ssl.client.keyPath }}", + {{- end }} + {{- end }} + "--web.listen-address=:{{ .Values.service.httpPort }}", + "--web.telemetry-path={{ .Values.web.path }}"] + securityContext: + capabilities: + drop: + - SETPCAP + - MKNOD + - AUDIT_WRITE + - CHOWN + - NET_RAW + - DAC_OVERRIDE + - FOWNER + - FSETID + - KILL + - SETGID + - SETUID + - NET_BIND_SERVICE + - SYS_CHROOT + - SETFCAP + readOnlyRootFilesystem: true + resources: +{{- $tierType := .Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} +{{ toYaml .Values.resources | indent 12 }} +{{- else if eq $tierType "Basic" }} +{{ toYaml .Values.resources_basic | indent 12 }} +{{- else if eq $tierType "Standard" }} +{{ toYaml .Values.resources_standard | indent 12 }} +{{- else }} +{{ toYaml .Values.resources | indent 12 }} +{{- end }} + ports: + - containerPort: {{ .Values.service.httpPort }} + name: http + livenessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 5 + timeoutSeconds: 5 + periodSeconds: 5 + readinessProbe: + httpGet: + path: /healthz + port: http + initialDelaySeconds: 1 + timeoutSeconds: 5 + periodSeconds: 5 + lifecycle: + preStop: + exec: + command: ["/bin/bash", "-c", "sleep 20"] + volumeMounts: + {{- if and .Values.es.ssl.enabled (eq .Values.es.ssl.useExistingSecrets false) }} + - mountPath: /ssl + name: ssl + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} +{{- if .Values.nodeSelector }} + nodeSelector: +{{ toYaml .Values.nodeSelector | indent 8 }} +{{- end }} +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +{{- end }} +{{- if .Values.affinity }} + affinity: +{{ toYaml .Values.affinity | indent 8 }} +{{- end }} + volumes: + {{- if and .Values.es.ssl.enabled (eq .Values.es.ssl.useExistingSecrets false) }} + - name: ssl + secret: + secretName: {{ template "elasticsearch-exporter.fullname" . }}-cert + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/podsecuritypolicies.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/podsecuritypolicies.yaml new file mode 100644 index 00000000000..109f41b0494 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/podsecuritypolicies.yaml @@ -0,0 +1,42 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.podSecurityPolicies.enabled -}} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +spec: + privileged: false + allowPrivilegeEscalation: false + requiredDropCapabilities: + - ALL + volumes: + - 'secret' + hostNetwork: false + hostIPC: false + hostPID: false + runAsUser: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + seLinux: + rule: 'RunAsAny' + supplementalGroups: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + fsGroup: + rule: 'MustRunAs' + ranges: + - min: 1 + max: 65535 + readOnlyRootFilesystem: true +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/prometheusrule.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/prometheusrule.yaml new file mode 100644 index 00000000000..53a535fa367 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/prometheusrule.yaml @@ -0,0 +1,26 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.prometheusRule.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} +{{- if .Values.prometheusRule.namespace }} + namespace: {{ .Values.prometheusRule.namespace }} +{{- end }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +{{- if .Values.prometheusRule.labels }} +{{- toYaml .Values.prometheusRule.labels | nindent 4 }} +{{- end }} +spec: +{{- with .Values.prometheusRule.rules }} + groups: + - name: {{ template "elasticsearch-exporter.name" $ }} + rules: {{ tpl (toYaml .) $ | nindent 8 }} +{{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/role.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/role.yaml new file mode 100644 index 00000000000..55418f0cd77 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/role.yaml @@ -0,0 +1,20 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.podSecurityPolicies.enabled -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +rules: +- apiGroups: ['extensions'] + resources: ['podsecuritypolicies'] + verbs: ['use'] + resourceNames: + - {{ template "elasticsearch-exporter.fullname" . }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/rolebinding.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/rolebinding.yaml new file mode 100644 index 00000000000..849ed5525f0 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/rolebinding.yaml @@ -0,0 +1,26 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.podSecurityPolicies.enabled }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "elasticsearch-exporter.fullname" . }} +subjects: +- kind: ServiceAccount + {{- if .Values.serviceAccount.create }} + name: {{ template "elasticsearch-exporter.fullname" . }} + {{- else }} + name: {{ .Values.serviceAccount.name }} + {{- end }} + namespace: {{ .Values.Namespace }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/service.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/service.yaml new file mode 100644 index 00000000000..4c9161d5892 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/service.yaml @@ -0,0 +1,28 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +kind: Service +apiVersion: v1 +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +{{- if .Values.service.labels }} +{{ toYaml .Values.service.labels | indent 4 }} +{{- end }} +{{- if .Values.service.annotations }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +{{- end }} +spec: + type: {{ .Values.service.type }} + ports: + - name: {{ .Values.service.metricsPort.name }} + port: {{ .Values.service.httpPort }} + protocol: TCP + selector: + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/serviceaccount.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/serviceaccount.yaml new file mode 100644 index 00000000000..7432bc68867 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/serviceaccount.yaml @@ -0,0 +1,14 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/templates/servicemonitor.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/servicemonitor.yaml new file mode 100644 index 00000000000..691296d0594 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/templates/servicemonitor.yaml @@ -0,0 +1,42 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.serviceMonitor.enabled }} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "elasticsearch-exporter.fullname" . }} + {{- if .Values.serviceMonitor.namespace }} + namespace: {{ .Values.serviceMonitor.namespace }} + {{- end }} + labels: + chart: {{ template "elasticsearch-exporter.chart" . }} + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" + {{- if .Values.serviceMonitor.labels }} + {{- toYaml .Values.serviceMonitor.labels | nindent 4 }} + {{- end }} +spec: + endpoints: + - interval: {{ .Values.serviceMonitor.interval }} + {{- if .Values.serviceMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout }} + {{- end }} + honorLabels: true + port: {{ .Values.service.metricsPort.name }} + path: {{ .Values.web.path }} + scheme: {{ .Values.serviceMonitor.scheme }} + {{- if .Values.serviceMonitor.relabelings }} + relabelings: + {{- toYaml .Values.serviceMonitor.relabelings | nindent 4 }} + {{- end }} + jobLabel: "{{ .Release.Name }}" + selector: + matchLabels: + app: {{ template "elasticsearch-exporter.name" . }} + release: "{{ .Release.Name }}" + namespaceSelector: + matchNames: + - {{ .Values.Namespace }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml new file mode 100644 index 00000000000..cecfb9ad3b2 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml @@ -0,0 +1,300 @@ +multiarch: + enabled: false + image: {} + +## number of exporter instances +## +#replicaCount: 1 +global: + Tier_Type: "" +## restart policy for all containers +## +Namespace: monitoring +restartPolicy: Always + +image: + repository: ghcr.io/atlanhq/elasticsearch-exporter + tag: 1.6.0 + pullPolicy: IfNotPresent + pullSecret: "" + +## Set enabled to false if you don't want securityContext +## in your Deployment. +## The below values are the default for kubernetes. +## Openshift won't deploy with runAsUser: 1000 without additional permissions. +securityContext: + enabled: true # Should be set to false when running on OpenShift + runAsUser: 1000 + +resources: + requests: + #cpu: 100m + memory: 128Mi + limits: + # cpu: 100m + memory: 128Mi + +resources_basic: + requests: + memory: 10Mi + limits: + memory: 128Mi + +resources_standard: + requests: + memory: 20Mi + limits: + memory: 128Mi + +priorityClassName: "" + +nodeSelector: {} + # nodegroup: atlan-services +tolerations: {} + +podAnnotations: {} + +affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - SPOT + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - spot + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - spot + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - elasticsearch-exporter + topologyKey: kubernetes.io/hostname +# podAntiAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# - labelSelector: +# matchExpressions: +# - key: release +# operator: In +# values: +# - atlas +# - postgresql +# - redis +# - zookeeper +# topologyKey: "kubernetes.io/hostname" +# nodeAffinity: +# requiredDuringSchedulingIgnoredDuringExecution: +# nodeSelectorTerms: +# - matchExpressions: +# - key: nodegroup +# operator: NotIn +# values: +# - atlan-spark + +service: + type: ClusterIP + httpPort: 9108 + metricsPort: + name: http + annotations: {} + labels: {} + +## Extra environment variables that will be passed into the exporter pod +## example: +## env: +## KEY_1: value1 +## KEY_2: value2 +env: {} + +## The name of a secret in the same kubernetes namespace which contain values to be added to the environment +## This can be useful for auth tokens, etc +envFromSecret: "" + +## A list of environment variables from secret refs that will be passed into the exporter pod +## example: +## This will set ${ES_PASSWORD} to the 'password' key from the 'my-secret' secret +## extraEnvSecrets: +## ES_PASSWORD: +## secret: my-secret +## key: password +extraEnvSecrets: {} + +# A list of secrets and their paths to mount inside the pod +# This is useful for mounting certificates for security +secretMounts: [] +# - name: elastic-certs +# secretName: elastic-certs +# path: /ssl + +es: + ## Address (host and port) of the Elasticsearch node we should connect to. + ## This could be a local node (localhost:9200, for instance), or the address + ## of a remote Elasticsearch server. When basic auth is needed, + ## specify as: ://:@:. e.g., http://admin:pass@localhost:9200. + ## + uri: http://atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200 + + ## If true, query stats for all nodes in the cluster, rather than just the + ## node we connect to. + ## + all: true + + ## If true, query stats for all indices in the cluster. + ## + indices: true + + ## If true, query settings stats for all indices in the cluster. + ## + indices_settings: true + + ## If true, query mapping stats for all indices in the cluster. + ## + indices_mappings: true + + + ## If true, query stats for shards in the cluster. + ## + shards: true + + ## If true, query stats for snapshots in the cluster. + ## + snapshots: true + + ## If true, query stats for cluster settings. + ## + cluster_settings: false + + ## Timeout for trying to get stats from Elasticsearch. (ex: 20s) + ## + timeout: 30s + + ## Skip SSL verification when connecting to Elasticsearch + ## (only available if image.tag >= 1.0.4rc1) + ## + sslSkipVerify: false + + + ssl: + ## If true, a secure connection to ES cluster is used + ## + enabled: false + + ## If true, certs from secretMounts will be need to be referenced instead of certs below + ## + useExistingSecrets: false + + ca: + + ## PEM that contains trusted CAs used for setting up secure Elasticsearch connection + ## + # pem: + + # Path of ca pem file which should match a secretMount path + path: /ssl/ca.pem + client: + ## if true, client SSL certificate is used for authentication + ## + enabled: true + + ## PEM that contains the client cert to connect to Elasticsearch. + ## + # pem: + + # Path of client pem file which should match a secretMount path + pemPath: /ssl/client.pem + + ## Private key for client auth when connecting to Elasticsearch + ## + # key: + + # Path of client key file which should match a secretMount path + keyPath: /ssl/client.key +web: + ## Path under which to expose metrics. + ## + path: /metrics + +serviceMonitor: + ## If true, a ServiceMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + #enabled: true + namespace: monitoring + labels: + release: "prometheus-operator" + interval: 5m + scrapeTimeout: 10s + scheme: http + relabelings: [] + +prometheusRule: + ## If true, a PrometheusRule CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + ## The rules will be processed as Helm template, allowing to set variables in them. + #enabled: true + namespace: monitoring + labels: + release: prometheus-operator + rules: + - record: elasticsearch_filesystem_data_used_percent + expr: | + 100 * (elasticsearch_filesystem_data_size_bytes{service="{{ template "elasticsearch-exporter.fullname" . }}"} - elasticsearch_filesystem_data_free_bytes{service="{{ template "elasticsearch-exporter.fullname" . }}"}) + / elasticsearch_filesystem_data_size_bytes{service="{{ template "elasticsearch-exporter.fullname" . }}"} + - record: elasticsearch_filesystem_data_free_percent + expr: | + (100 - elasticsearch_filesystem_data_used_percent{service="{{ template "elasticsearch-exporter.fullname" . }}"}) + - alert: ElasticsearchTooFewNodesRunning + expr: | + elasticsearch_cluster_health_number_of_nodes{service="{{ template "elasticsearch-exporter.fullname" . }}"} < 3 + for: 5m + labels: + severity: critical + team: infra + component: ElasticSearch + annotations: + description: There are only {{ "{{ $value }}" }} < 3 ElasticSearch nodes running + summary: ElasticSearch running on less than 3 nodes + - alert: ElasticsearchHeapTooHigh + expr: | + elasticsearch_jvm_memory_used_bytes{service="{{ template "elasticsearch-exporter.fullname" . }}", area="heap"} / elasticsearch_jvm_memory_max_bytes{service="{{ template "elasticsearch-exporter.fullname" . }}", area="heap"} + > 0.9 + for: 15m + labels: + severity: critical + team: infra + component: ElasticSearch + annotations: + description: The heap usage is over 90% for 15m + summary: ElasticSearch node {{ "{{ $labels.node }}" }} heap usage is high + +# Create a service account +# To use a service account not handled by the chart, set the name here +# and set create to false +serviceAccount: + create: false + name: default + +# Creates a PodSecurityPolicy and the role/rolebinding +# allowing the serviceaccount to use it +podSecurityPolicies: + enabled: false diff --git a/helm/atlas-read/charts/elasticsearch-read/.helmignore b/helm/atlas-read/charts/elasticsearch-read/.helmignore new file mode 100755 index 00000000000..e12c0b4b918 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/.helmignore @@ -0,0 +1,2 @@ +tests/ +.pytest_cache/ diff --git a/helm/atlas-read/charts/elasticsearch-read/Chart.yaml b/helm/atlas-read/charts/elasticsearch-read/Chart.yaml new file mode 100755 index 00000000000..a053eaa5a76 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +appVersion: 7.6.1 +description: Official Elastic helm chart for Elasticsearch +home: https://github.com/elastic/helm-charts +icon: https://helm.elastic.co/icons/elasticsearch.png +maintainers: +- email: helm-charts@elastic.co + name: Elastic +name: elasticsearch-read +sources: +- https://github.com/elastic/elasticsearch +version: 7.6.1 diff --git a/helm/atlas-read/charts/elasticsearch-read/Makefile b/helm/atlas-read/charts/elasticsearch-read/Makefile new file mode 100755 index 00000000000..22218a1f62a --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/Makefile @@ -0,0 +1 @@ +include ../helpers/common.mk diff --git a/helm/atlas-read/charts/elasticsearch-read/README.md b/helm/atlas-read/charts/elasticsearch-read/README.md new file mode 100755 index 00000000000..cfa17a30dad --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/README.md @@ -0,0 +1,367 @@ +# Elasticsearch Helm Chart + +This functionality is in beta and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. + +This helm chart is a lightweight way to configure and run our official [Elasticsearch docker image](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) + +## Notice + +[7.6.1](https://github.com/elastic/helm-charts/releases/tag/7.6.1) release is introducing a change for Elasticsearch users upgrading from a previous chart version. +Following our recommandations, the change tracked in [#458](https://github.com/elastic/helm-charts/pull/458) is setting CPU request to the same value as CPU limit. + +For users which don't overwrite default values for CPU requests, Elasticsearch pod will now request `1000m` CPU instead of `100m` CPU. This may impact the resources (nodes) required in your Kubernetes cluster to deploy Elasticsearch chart. + +If you wish to come back to former values, you just need to override CPU requests when deploying your Helm Chart. + +- Overriding CPU requests in commandline argument: +``` +helm install --name elasticsearch --set resources.requests.cpu=100m elastic/elasticsearch +``` + +- Overriding CPU requests in your custom `values.yaml` file: +``` +resources: + requests: + cpu: "100m" +``` + +## Requirements + +* [Helm](https://helm.sh/) >=2.8.0 and <3.0.0 (see parent [README](https://github.com/elastic/helm-charts/tree/master/README.md) for more details) +* Kubernetes >=1.8 +* Minimum cluster requirements include the following to run this chart with default settings. All of these settings are configurable. + * Three Kubernetes nodes to respect the default "hard" affinity settings + * 1GB of RAM for the JVM heap + +## Usage notes and getting started + +* This repo includes a number of [example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) configurations which can be used as a reference. They are also used in the automated testing of this chart +* Automated testing of this chart is currently only run against GKE (Google Kubernetes Engine). +* The chart deploys a statefulset and by default will do an automated rolling update of your cluster. It does this by waiting for the cluster health to become green after each instance is updated. If you prefer to update manually you can set [`updateStrategy: OnDelete`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#on-delete) +* It is important to verify that the JVM heap size in `esJavaOpts` and to set the CPU/Memory `resources` to something suitable for your cluster +* To simplify chart and maintenance each set of node groups is deployed as a separate helm release. Take a look at the [multi](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/multi) example to get an idea for how this works. Without doing this it isn't possible to resize persistent volumes in a statefulset. By setting it up this way it makes it possible to add more nodes with a new storage size then drain the old ones. It also solves the problem of allowing the user to determine which node groups to update first when doing upgrades or changes. +* We have designed this chart to be very un-opinionated about how to configure Elasticsearch. It exposes ways to set environment variables and mount secrets inside of the container. Doing this makes it much easier for this chart to support multiple versions with minimal changes. + +## Migration from helm/charts stable + +If you currently have a cluster deployed with the [helm/charts stable](https://github.com/helm/charts/tree/master/stable/elasticsearch) chart you can follow the [migration guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/migration/README.md) + +## Installing + +### Using Helm repository + +* Add the elastic helm charts repo + ``` + helm repo add elastic https://helm.elastic.co + ``` +* Install it + ``` + helm install --name elasticsearch elastic/elasticsearch + ``` + +### Using master branch + +* Clone the git repo + ``` + git clone git@github.com:elastic/helm-charts.git + ``` +* Install it + ``` + helm install --name elasticsearch ./helm-charts/elasticsearch + ``` + +## Compatibility + +This chart is tested with the latest supported versions. The currently tested versions are: + +| 6.x | 7.x | +| ----- | ----- | +| 6.8.7 | 7.6.1 | + +Examples of installing older major versions can be found in the [examples](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) directory. + +While only the latest releases are tested, it is possible to easily install old or new releases by overriding the `imageTag`. To install version `7.6.1` of Elasticsearch it would look like this: + +``` +helm install --name elasticsearch elastic/elasticsearch --set imageTag=7.6.1 +``` + +## Configuration + +| Parameter | Description | Default | +| ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | +| `clusterName` | This will be used as the Elasticsearch [cluster.name](https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster.name.html) and should be unique per cluster in the namespace | `elasticsearch` | +| `nodeGroup` | This is the name that will be used for each group of nodes in the cluster. The name will be `clusterName-nodeGroup-X`, `nameOverride-nodeGroup-X` if a nameOverride is specified, and `fullnameOverride-X` if a fullnameOverride is specified | `master` | +| `masterService` | Optional. The service name used to connect to the masters. You only need to set this if your master `nodeGroup` is set to something other than `master`. See [Clustering and Node Discovery](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#clustering-and-node-discovery) for more information | `` | +| `roles` | A hash map with the [specific roles](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html) for the node group | `master: true`
`data: true`
`ingest: true` | +| `replicas` | Kubernetes replica count for the statefulset (i.e. how many pods) | `3` | +| `minimumMasterNodes` | The value for [discovery.zen.minimum_master_nodes](https://www.elastic.co/guide/en/elasticsearch/reference/6.7/discovery-settings.html#minimum_master_nodes). Should be set to `(master_eligible_nodes / 2) + 1`. Ignored in Elasticsearch versions >= 7. | `2` | +| `esMajorVersion` | Used to set major version specific configuration. If you are using a custom image and not running the default Elasticsearch version you will need to set this to the version you are running (e.g. `esMajorVersion: 6`) | `""` | +| `esConfig` | Allows you to add any config files in `/usr/share/elasticsearch/config/` such as `elasticsearch.yml` and `log4j2.properties`. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example of the formatting. | `{}` | +| `extraEnvs` | Extra [environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config) which will be appended to the `env:` definition for the container | `[]` | +| `extraVolumes` | Templatable string of additional volumes to be passed to the `tpl` function | `""` | +| `extraVolumeMounts` | Templatable string of additional volumeMounts to be passed to the `tpl` function | `""` | +| `extraContainers` | Templatable string of additional containers to be passed to the `tpl` function | `""` | +| `extraInitContainers` | Templatable string of additional init containers to be passed to the `tpl` function | `""` | +| `secretMounts` | Allows you easily mount a secret as a file inside the statefulset. Useful for mounting certificates and other secrets. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example | `[]` | +| `image` | The Elasticsearch docker image | `docker.elastic.co/elasticsearch/elasticsearch` | +| `imageTag` | The Elasticsearch docker image tag | `7.6.1` | +| `imagePullPolicy` | The Kubernetes [imagePullPolicy](https://kubernetes.io/docs/concepts/containers/images/#updating-images) value | `IfNotPresent` | +| `podAnnotations` | Configurable [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) applied to all Elasticsearch pods | `{}` | +| `labels` | Configurable [label](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) applied to all Elasticsearch pods | `{}` | +| `esJavaOpts` | [Java options](https://www.elastic.co/guide/en/elasticsearch/reference/current/jvm-options.html) for Elasticsearch. This is where you should configure the [jvm heap size](https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html) | `-Xmx1g -Xms1g` | +| `resources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the statefulset | `requests.cpu: 1000m`
`requests.memory: 2Gi`
`limits.cpu: 1000m`
`limits.memory: 2Gi` | +| `initResources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the initContainer in the statefulset | {} | +| `sidecarResources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the sidecar containers in the statefulset | {} | +| `networkHost` | Value for the [network.host Elasticsearch setting](https://www.elastic.co/guide/en/elasticsearch/reference/current/network.host.html) | `0.0.0.0` | +| `volumeClaimTemplate` | Configuration for the [volumeClaimTemplate for statefulsets](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-storage). You will want to adjust the storage (default `30Gi`) and the `storageClassName` if you are using a different storage class | `accessModes: [ "ReadWriteOnce" ]`
`resources.requests.storage: 30Gi` | +| `persistence.annotations` | Additional persistence annotations for the `volumeClaimTemplate` | `{}` | +| `persistence.enabled` | Enables a persistent volume for Elasticsearch data. Can be disabled for nodes that only have [roles](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html) which don't require persistent data. | `true` | +| `priorityClassName` | The [name of the PriorityClass](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass). No default is supplied as the PriorityClass must be created first. | `""` | +| `antiAffinityTopologyKey` | The [anti-affinity topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity). By default this will prevent multiple Elasticsearch nodes from running on the same Kubernetes node | `kubernetes.io/hostname` | +| `antiAffinity` | Setting this to hard enforces the [anti-affinity rules](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity). If it is set to soft it will be done "best effort". Other values will be ignored. | `hard` | +| `nodeAffinity` | Value for the [node affinity settings](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature) | `{}` | +| `podManagementPolicy` | By default Kubernetes [deploys statefulsets serially](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies). This deploys them in parallel so that they can discover eachother | `Parallel` | +| `protocol` | The protocol that will be used for the readinessProbe. Change this to `https` if you have `xpack.security.http.ssl.enabled` set | `http` | +| `httpPort` | The http port that Kubernetes will use for the healthchecks and the service. If you change this you will also need to set [http.port](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#_settings) in `extraEnvs` | `9200` | +| `transportPort` | The transport port that Kubernetes will use for the service. If you change this you will also need to set [transport port configuration](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#_transport_settings) in `extraEnvs` | `9300` | +| `service.labels` | Labels to be added to non-headless service | `{}` | +| `service.labelsHeadless` | Labels to be added to headless service | `{}` | +| `service.type` | Type of elasticsearch service. [Service Types](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) | `ClusterIP` | +| `service.nodePort` | Custom [nodePort](https://kubernetes.io/docs/concepts/services-networking/service/#nodeport) port that can be set if you are using `service.type: nodePort`. | `` | +| `service.annotations` | Annotations that Kubernetes will use for the service. This will configure load balancer if `service.type` is `LoadBalancer` [Annotations](https://kubernetes.io/docs/concepts/services-networking/service/#ssl-support-on-aws) | `{}` | +| `service.httpPortName` | The name of the http port within the service | `http` | +| `service.transportPortName` | The name of the transport port within the service | `transport` | +| `service.loadBalancerSourceRanges` | The IP ranges that are allowed to access | `[]` | +| `updateStrategy` | The [updateStrategy](https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#updating-statefulsets) for the statefulset. By default Kubernetes will wait for the cluster to be green after upgrading each pod. Setting this to `OnDelete` will allow you to manually delete each pod during upgrades | `RollingUpdate` | +| `maxUnavailable` | The [maxUnavailable](https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget) value for the pod disruption budget. By default this will prevent Kubernetes from having more than 1 unhealthy pod in the node group | `1` | +| `fsGroup (DEPRECATED)` | The Group ID (GID) for [securityContext.fsGroup](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) so that the Elasticsearch user can read from the persistent volume | `` | +| `podSecurityContext` | Allows you to set the [securityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for the pod | `fsGroup: 1000`
`runAsUser: 1000` | +| `securityContext` | Allows you to set the [securityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the container | `capabilities.drop:[ALL]`
`runAsNonRoot: true`
`runAsUser: 1000` | +| `terminationGracePeriod` | The [terminationGracePeriod](https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods) in seconds used when trying to stop the pod | `120` | +| `sysctlInitContainer.enabled` | Allows you to disable the sysctlInitContainer if you are setting vm.max_map_count with another method | `true` | +| `sysctlVmMaxMapCount` | Sets the [sysctl vm.max_map_count](https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html#vm-max-map-count) needed for Elasticsearch | `262144` | +| `readinessProbe` | Configuration fields for the [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/) | `failureThreshold: 3`
`initialDelaySeconds: 10`
`periodSeconds: 10`
`successThreshold: 3`
`timeoutSeconds: 5` | +| `clusterHealthCheckParams` | The [Elasticsearch cluster health status params](https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params) that will be used by readinessProbe command | `wait_for_status=green&timeout=1s` | +| `imagePullSecrets` | Configuration for [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret) so that you can use a private registry for your image | `[]` | +| `nodeSelector` | Configurable [nodeSelector](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) so that you can target specific nodes for your Elasticsearch cluster | `{}` | +| `tolerations` | Configurable [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) | `[]` | +| `ingress` | Configurable [ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) to expose the Elasticsearch service. See [`values.yaml`](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example | `enabled: false` | +| `schedulerName` | Name of the [alternate scheduler](https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/#specify-schedulers-for-pods) | `nil` | +| `masterTerminationFix` | A workaround needed for Elasticsearch < 7.2 to prevent master status being lost during restarts [#63](https://github.com/elastic/helm-charts/issues/63) | `false` | +| `lifecycle` | Allows you to add lifecycle configuration. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example of the formatting. | `{}` | +| `keystore` | Allows you map Kubernetes secrets into the keystore. See the [config example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/config/values.yaml) and [how to use the keystore](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-use-the-keystore) | `[]` | +| `rbac` | Configuration for creating a role, role binding and service account as part of this helm chart with `create: true`. Also can be used to reference an external service account with `serviceAccountName: "externalServiceAccountName"`. | `create: false`
`serviceAccountName: ""` | +| `podSecurityPolicy` | Configuration for create a pod security policy with minimal permissions to run this Helm chart with `create: true`. Also can be used to reference an external pod security policy with `name: "externalPodSecurityPolicy"` | `create: false`
`name: ""` | +| `nameOverride` | Overrides the clusterName when used in the naming of resources | `""` | +| `fullnameOverride` | Overrides the clusterName and nodeGroup when used in the naming of resources. This should only be used when using a single nodeGroup, otherwise you will have name conflicts | `""` | + +## Try it out + +In [examples/](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) you will find some example configurations. These examples are used for the automated testing of this helm chart + +### Default + +To deploy a cluster with all default values and run the integration tests + +``` +cd examples/default +make +``` + +### Multi + +A cluster with dedicated node types + +``` +cd examples/multi +make +``` + +### Security + +A cluster with node to node security and https enabled. This example uses autogenerated certificates and password, for a production deployment you want to generate SSL certificates following the [official docs](https://www.elastic.co/guide/en/elasticsearch/reference/current/configuring-tls.html#node-certificates). + +* Generate the certificates and install Elasticsearch + ``` + cd examples/security + make + + # Run a curl command to interact with the cluster + kubectl exec -ti security-master-0 -- sh -c 'curl -u $ELASTIC_USERNAME:$ELASTIC_PASSWORD -k https://localhost:9200/_cluster/health?pretty' + ``` + +### FAQ + +#### How to install plugins? + +The [recommended](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#_c_customized_image) way to install plugins into our docker images is to create a custom docker image. + +The Dockerfile would look something like: + +``` +ARG elasticsearch_version +FROM docker.elastic.co/elasticsearch/elasticsearch:${elasticsearch_version} + +RUN bin/elasticsearch-plugin install --batch repository-gcs +``` + +And then updating the `image` in values to point to your custom image. + +There are a couple reasons we recommend this. + +1. Tying the availability of Elasticsearch to the download service to install plugins is not a great idea or something that we recommend. Especially in Kubernetes where it is normal and expected for a container to be moved to another host at random times. +2. Mutating the state of a running docker image (by installing plugins) goes against best practices of containers and immutable infrastructure. + +#### How to use the keystore? + + +##### Basic example + +Create the secret, the key name needs to be the keystore key path. In this example we will create a secret from a file and from a literal string. + +``` +kubectl create secret generic encryption_key --from-file=xpack.watcher.encryption_key=./watcher_encryption_key +kubectl create secret generic slack_hook --from-literal=xpack.notification.slack.account.monitoring.secure_url='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +To add these secrets to the keystore: +``` +keystore: + - secretName: encryption_key + - secretName: slack_hook +``` + +##### Multiple keys + +All keys in the secret will be added to the keystore. To create the previous example in one secret you could also do: + +``` +kubectl create secret generic keystore_secrets --from-file=xpack.watcher.encryption_key=./watcher_encryption_key --from-literal=xpack.notification.slack.account.monitoring.secure_url='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +``` +keystore: + - secretName: keystore_secrets +``` + +##### Custom paths and keys + +If you are using these secrets for other applications (besides the Elasticsearch keystore) then it is also possible to specify the keystore path and which keys you want to add. Everything specified under each `keystore` item will be passed through to the `volumeMounts` section for [mounting the secret](https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets). In this example we will only add the `slack_hook` key from a secret that also has other keys. Our secret looks like this: + +``` +kubectl create secret generic slack_secrets --from-literal=slack_channel='#general' --from-literal=slack_hook='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +We only want to add the `slack_hook` key to the keystore at path `xpack.notification.slack.account.monitoring.secure_url`. + +``` +keystore: + - secretName: slack_secrets + items: + - key: slack_hook + path: xpack.notification.slack.account.monitoring.secure_url +``` + +You can also take a look at the [config example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/config/) which is used as part of the automated testing pipeline. + +#### How to enable snapshotting? + +1. Install your [snapshot plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository.html) into a custom docker image following the [how to install plugins guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-install-plugins) +2. Add any required secrets or credentials into an Elasticsearch keystore following the [how to use the keystore guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-use-the-keystore) +3. Configure the [snapshot repository](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html) as you normally would. +4. To automate snapshots you can use a tool like [curator](https://www.elastic.co/guide/en/elasticsearch/client/curator/current/snapshot.html). In the future there are plans to have Elasticsearch manage automated snapshots with [Snapshot Lifecycle Management](https://github.com/elastic/elasticsearch/issues/38461). + +### Local development environments + +This chart is designed to run on production scale Kubernetes clusters with multiple nodes, lots of memory and persistent storage. For that reason it can be a bit tricky to run them against local Kubernetes environments such as minikube. Below are some examples of how to get this working locally. + +#### Minikube + +This chart also works successfully on [minikube](https://kubernetes.io/docs/setup/minikube/) in addition to typical hosted Kubernetes environments. +An example `values.yaml` file for minikube is provided under `examples/`. + +In order to properly support the required persistent volume claims for the Elasticsearch `StatefulSet`, the `default-storageclass` and `storage-provisioner` minikube addons must be enabled. + +``` +minikube addons enable default-storageclass +minikube addons enable storage-provisioner +cd examples/minikube +make +``` + +Note that if `helm` or `kubectl` timeouts occur, you may consider creating a minikube VM with more CPU cores or memory allocated. + +#### Docker for Mac - Kubernetes + +It is also possible to run this chart with the built in Kubernetes cluster that comes with [docker-for-mac](https://docs.docker.com/docker-for-mac/kubernetes/). + +``` +cd examples/docker-for-mac +make +``` + +#### KIND - Kubernetes + +It is also possible to run this chart using a Kubernetes [KIND (Kubernetes in Docker)](https://github.com/kubernetes-sigs/kind) cluster: + +``` +cd examples/kubernetes-kind +make +``` + +#### MicroK8S + +It is also possible to run this chart using [MicroK8S](https://microk8s.io): + +``` +microk8s.enable dns +microk8s.enable helm +microk8s.enable storage +cd examples/microk8s +make +``` + +## Clustering and Node Discovery + +This chart facilitates Elasticsearch node discovery and services by creating two `Service` definitions in Kubernetes, one with the name `$clusterName-$nodeGroup` and another named `$clusterName-$nodeGroup-headless`. +Only `Ready` pods are a part of the `$clusterName-$nodeGroup` service, while all pods (`Ready` or not) are a part of `$clusterName-$nodeGroup-headless`. + +If your group of master nodes has the default `nodeGroup: master` then you can just add new groups of nodes with a different `nodeGroup` and they will automatically discover the correct master. If your master nodes have a different `nodeGroup` name then you will need to set `masterService` to `$clusterName-$masterNodeGroup`. + +The chart value for `masterService` is used to populate `discovery.zen.ping.unicast.hosts`, which Elasticsearch nodes will use to contact master nodes and form a cluster. +Therefore, to add a group of nodes to an existing cluster, setting `masterService` to the desired `Service` name of the related cluster is sufficient. + +For an example of deploying both a group master nodes and data nodes using multiple releases of this chart, see the accompanying values files in `examples/multi`. + +## Testing + +This chart uses [pytest](https://docs.pytest.org/en/latest/) to test the templating logic. The dependencies for testing can be installed from the [`requirements.txt`](https://github.com/elastic/helm-charts/tree/master/requirements.txt) in the parent directory. + +``` +pip install -r ../requirements.txt +make pytest +``` + +You can also use `helm template` to look at the YAML being generated + +``` +make template +``` + +It is possible to run all of the tests and linting inside of a docker container + +``` +make test +``` + +## Integration Testing + +Integration tests are run using [goss](https://github.com/aelsabbahy/goss/blob/master/docs/manual.md) which is a serverspec like tool written in golang. See [goss.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/default/test/goss.yaml) for an example of what the tests look like. + +To run the goss tests against the default example: + +``` +cd examples/default +make goss +``` diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/NOTES.txt b/helm/atlas-read/charts/elasticsearch-read/templates/NOTES.txt new file mode 100755 index 00000000000..73edf425af7 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/NOTES.txt @@ -0,0 +1,4 @@ +1. Watch all cluster members come up. + $ kubectl get pods --namespace={{ .Release.Namespace }} -l app={{ template "elasticsearch.uname" . }} -w +2. Test cluster health using Helm test. + $ helm test {{ .Release.Name }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/_helpers.tpl b/helm/atlas-read/charts/elasticsearch-read/templates/_helpers.tpl new file mode 100755 index 00000000000..b8c971dcc64 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/_helpers.tpl @@ -0,0 +1,87 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "elasticsearch.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "elasticsearch.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "elasticsearch.uname" -}} +{{- if empty .Values.fullnameOverride -}} +{{- if empty .Values.nameOverride -}} +{{ .Values.clusterName }}-{{ .Values.nodeGroup }} +{{- else -}} +{{ .Values.nameOverride }}-{{ .Values.nodeGroup }} +{{- end -}} +{{- else -}} +{{ .Values.fullnameOverride }} +{{- end -}} +{{- end -}} + +{{- define "elasticsearch.masterService" -}} +{{- if empty .Values.masterService -}} +{{- if empty .Values.fullnameOverride -}} +{{- if empty .Values.nameOverride -}} +{{ .Values.clusterName }}-master +{{- else -}} +{{ .Values.nameOverride }}-master +{{- end -}} +{{- else -}} +{{ .Values.fullnameOverride }} +{{- end -}} +{{- else -}} +{{ .Values.masterService }} +{{- end -}} +{{- end -}} + +{{- define "elasticsearch.endpoints" -}} +{{- $replicas := int (toString (.Values.replicas)) }} +{{- $uname := printf "%s-%s" .Values.clusterName .Values.nodeGroup }} + {{- range $i, $e := untilStep 0 $replicas 1 -}} +{{ $uname }}-{{ $i }}, + {{- end -}} +{{- end -}} + +{{- define "elasticsearch.esMajorVersion" -}} +{{- if .Values.esMajorVersion -}} +{{ .Values.esMajorVersion }} +{{- else -}} +{{- $version := int (index (.Values.imageTag | splitList ".") 0) -}} + {{- if and (contains "docker.elastic.co/elasticsearch/elasticsearch" .Values.image) (not (eq $version 0)) -}} +{{ $version }} + {{- else -}} +6 + {{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for statefulset. +*/}} +{{- define "elasticsearch.statefulset.apiVersion" -}} +{{- if semverCompare "<1.9-0" .Capabilities.KubeVersion.GitVersion -}} +{{- print "apps/v1beta2" -}} +{{- else -}} +{{- print "apps/v1" -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "elasticsearch.ingress.apiVersion" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +{{- print "extensions/v1beta1" -}} +{{- else -}} +{{- print "networking.k8s.io/v1beta1" -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/backup-cronjob.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/backup-cronjob.yaml new file mode 100644 index 00000000000..630b205a716 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/backup-cronjob.yaml @@ -0,0 +1,43 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.backup.enabled -}} +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: {{ template "elasticsearch.uname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + schedule: "{{ .Values.backup.schedule }}" + jobTemplate: + spec: + template: + spec: + {{- with .Values.nodeSelector }} + nodeSelector: + {{ toYaml . }} + {{- end }} + containers: + - name: es-backup + {{- if and .Values.multiarch.enabled .Values.multiarch.image.backup }} + image: {{ .Values.multiarch.image.backup }} + {{- else }} + image: {{ .Values.backup.image }} + {{- end }} + args: + - /bin/sh + - -c + - | + now="$(date +'%d%m%Y')" + curl -X PUT "http://atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200/_snapshot/atlan_s3_repository/atlan_nightly_backup_$now?wait_for_completion=true&pretty" + restartPolicy: OnFailure +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/configmap.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/configmap.yaml new file mode 100755 index 00000000000..1be43915b80 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/configmap.yaml @@ -0,0 +1,19 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.esConfig }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "elasticsearch.uname" . }}-config + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +data: +{{- range $path, $config := .Values.esConfig }} + {{ $path }}: | +{{ $config | indent 4 -}} +{{- end -}} +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/es-regional-configmap.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/es-regional-configmap.yaml new file mode 100644 index 00000000000..b2b0e8f2f9b --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/es-regional-configmap.yaml @@ -0,0 +1,39 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.s3_regional_endpoint }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "elasticsearch.uname" . }}-plugin-install-regional + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +data: + plugin-install.sh: | + #!/bin/bash + + echo "[+] Configuring plugins for elasticsearch - with regional endpoint" + + # Delete temporary keystore if present + rm -rf /usr/share/elasticsearch/config/elasticsearch.keystore.tmp + + # Configure S3 repository + # S3_BUCKET_NAME=atlan-devops-local, for testing purpose + # S3_BUCKET_PATH=atlan/infra/elasticsearch + curl -X PUT "localhost:9200/_snapshot/atlan_s3_repository?pretty" -H 'Content-Type: application/json' -d' + { + "type": "s3", + "settings": { + "bucket": "'"$S3_BUCKET_NAME"'", + "base_path": "'"$S3_BUCKET_PATH"'", + "role_arn": "'"$S3_BUCKET_ROLE_ARN"'", + "region": "'"$S3_BUCKET_REGION"'", + "endpoint" : "{{ .Values.s3_regional_endpoint }}", + "compress": "true" + } + } + ' +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/ingress.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/ingress.yaml new file mode 100755 index 00000000000..6e8b8f43e14 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/ingress.yaml @@ -0,0 +1,57 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "elasticsearch.uname" . -}} +{{- $servicePort := .Values.httpPort -}} +{{- $ingressPath := .Values.ingress.path -}} +{{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: networking.k8s.io/v1 +{{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ .Chart.Name }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + {{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }} + pathType: ImplementationSpecific + {{- end }} + backend: + {{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }} + service: + name: {{ $fullName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/poddisruptionbudget.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/poddisruptionbudget.yaml new file mode 100755 index 00000000000..0a91b4a75a7 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/poddisruptionbudget.yaml @@ -0,0 +1,19 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +--- +{{- if .Values.minAvailable }} +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: "{{ template "elasticsearch.uname" . }}-pdb" + namespace: {{ .Values.Namespace }} +spec: + minAvailable: {{ .Values.minAvailable }} + selector: + matchLabels: + app: "{{ template "elasticsearch.uname" . }}" +{{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/podsecuritypolicy.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/podsecuritypolicy.yaml new file mode 100755 index 00000000000..4661914f368 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/podsecuritypolicy.yaml @@ -0,0 +1,17 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.podSecurityPolicy.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ default $fullName .Values.podSecurityPolicy.name | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +spec: +{{ toYaml .Values.podSecurityPolicy.spec | indent 2 }} +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/role.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/role.yaml new file mode 100755 index 00000000000..d019bd5c801 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/role.yaml @@ -0,0 +1,28 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +rules: + - apiGroups: + - extensions + resources: + - podsecuritypolicies + resourceNames: + {{- if eq .Values.podSecurityPolicy.name "" }} + - {{ $fullName | quote }} + {{- else }} + - {{ .Values.podSecurityPolicy.name | quote }} + {{- end }} + verbs: + - use +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml new file mode 100755 index 00000000000..6b51efdfe05 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml @@ -0,0 +1,27 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +subjects: + - kind: ServiceAccount + {{- if eq .Values.rbac.serviceAccountName "" }} + name: {{ $fullName | quote }} + {{- else }} + name: {{ .Values.rbac.serviceAccountName | quote }} + {{- end }} + namespace: {{ .Release.Namespace | quote }} +roleRef: + kind: Role + name: {{ $fullName | quote }} + apiGroup: rbac.authorization.k8s.io +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/service.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/service.yaml new file mode 100644 index 00000000000..f031046d81c --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/service.yaml @@ -0,0 +1,76 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +--- +kind: Service +apiVersion: v1 +metadata: +{{- if eq .Values.nodeGroup "master" }} + name: {{ template "elasticsearch.masterService" . }} + namespace: {{ .Values.Namespace }} +{{- else }} + name: {{ template "elasticsearch.uname" . }} + namespace: {{ .Values.Namespace }} +{{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +{{- if .Values.service.labels }} +{{ toYaml .Values.service.labels | indent 4}} +{{- end }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +spec: + type: {{ .Values.service.type }} + selector: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + ports: + - name: {{ .Values.service.httpPortName | default "http" }} + protocol: TCP + port: {{ .Values.httpPort }} +{{- if .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} +{{- end }} + - name: {{ .Values.service.transportPortName | default "transport" }} + protocol: TCP + port: {{ .Values.transportPort }} +{{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml . | indent 4 }} +{{- end }} +--- +kind: Service +apiVersion: v1 +metadata: +{{- if eq .Values.nodeGroup "master" }} + name: {{ template "elasticsearch.masterService" . }}-headless + namespace: {{ .Values.Namespace }} +{{- else }} + name: {{ template "elasticsearch.uname" . }}-headless + namespace: {{ .Values.Namespace }} +{{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +{{- if .Values.service.labelsHeadless }} +{{ toYaml .Values.service.labelsHeadless | indent 4 }} +{{- end }} + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + clusterIP: None # This is needed for statefulset hostnames like elasticsearch-0 to resolve + # Create endpoints also if the related pod isn't ready + publishNotReadyAddresses: true + selector: + app: "{{ template "elasticsearch.uname" . }}" + ports: + - name: {{ .Values.service.httpPortName | default "http" }} + port: {{ .Values.httpPort }} + - name: {{ .Values.service.transportPortName | default "transport" }} + port: {{ .Values.transportPort }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/serviceaccount.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/serviceaccount.yaml new file mode 100755 index 00000000000..e295a32ae0a --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/serviceaccount.yaml @@ -0,0 +1,20 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + {{- if eq .Values.rbac.serviceAccountName "" }} + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + {{- else }} + name: {{ .Values.rbac.serviceAccountName | quote }} + namespace: {{ .Values.Namespace }} + {{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml new file mode 100755 index 00000000000..69dee92bbe9 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml @@ -0,0 +1,439 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +--- +{{- $isAWSCloud := eq .Values.global.cloud "aws" }} +apiVersion: {{ template "elasticsearch.statefulset.apiVersion" . }} +kind: StatefulSet +metadata: + name: {{ template "elasticsearch.uname" . }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + esMajorVersion: "{{ include "elasticsearch.esMajorVersion" . }}" +spec: + serviceName: {{ template "elasticsearch.uname" . }}-headless + selector: + matchLabels: + app: "{{ template "elasticsearch.uname" . }}" + replicas: {{ .Values.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: {{ template "elasticsearch.uname" . }} + {{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 8 }} + {{- end }} + spec: +{{ toYaml .Values.volumeClaimTemplate | indent 6 }} + {{- end }} + template: + metadata: + name: "{{ template "elasticsearch.uname" . }}" + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + # backup.velero.io/backup-volumes: {{ template "elasticsearch.uname" . }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{/* This forces a restart if the configmap has changed */}} + {{- if .Values.esConfig }} + configchecksum: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- end }} + spec: + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + securityContext: +{{ toYaml .Values.podSecurityContext | indent 8 }} + {{- if .Values.fsGroup }} + fsGroup: {{ .Values.fsGroup }} # Deprecated value, please use .Values.podSecurityContext.fsGroup + {{- end }} + {{- if .Values.rbac.create }} + serviceAccountName: "{{ template "elasticsearch.uname" . }}" + {{- else if not (eq .Values.rbac.serviceAccountName "") }} + serviceAccountName: {{ .Values.rbac.serviceAccountName | quote }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 6 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + affinity: + {{- end }} + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- if eq .Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: "{{ template "elasticsearch.uname" .}}" + topologyKey: {{ .Values.antiAffinityTopologyKey }} + {{- else if eq .Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: {{ .Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "elasticsearch.uname" . }}" + {{- end }} + {{- end }} + nodeAffinity: + {{- if eq .Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + volumes: + {{- if .Values.s3_regional_endpoint }} + - name: plugin-install-regional + configMap: + name: {{ template "elasticsearch.uname" . }}-plugin-install-regional + defaultMode: 344 + items: + - key: plugin-install.sh + path: plugin-install.sh + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.esConfig }} + - name: esconfig + configMap: + name: {{ template "elasticsearch.uname" . }}-config + {{- end }} + - name: elasticsearch-synonym-config + configMap: + name: elasticsearch-read-synonym-config +{{- if .Values.keystore }} + - name: keystore + emptyDir: {} + {{- range .Values.keystore }} + - name: keystore-{{ .secretName }} + secret: {{ toYaml . | nindent 12 }} + {{- end }} +{{ end }} +{{- if .Values.extraVolumes }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + initContainers: + {{- if .Values.sysctlInitContainer.enabled }} + - name: configure-sysctl + securityContext: + runAsUser: 0 + privileged: true + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: ["sysctl", "-w", "vm.max_map_count={{ .Values.sysctlVmMaxMapCount}}"] + resources: +{{ toYaml .Values.initResources | indent 10 }} + {{- end }} +{{ if .Values.keystore }} + - name: keystore + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: + - sh + - -c + - | + #!/usr/bin/env bash + set -euo pipefail + + for i in /tmp/keystoreSecrets/*/*; do + key=$(basename $i) + echo "Adding file $i to keystore key $key" + elasticsearch-keystore add-file "$key" "$i" + done + + # Add the bootstrap password since otherwise the Elasticsearch entrypoint tries to do this on startup + if [ ! -z ${ELASTIC_PASSWORD+x} ]; then + echo 'Adding env $ELASTIC_PASSWORD to keystore as key bootstrap.password' + echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x bootstrap.password + fi + + cp -a /usr/share/elasticsearch/config/elasticsearch.keystore /tmp/keystore/ + env: {{ toYaml .Values.extraEnvs | nindent 10 }} + resources: {{ toYaml .Values.initResources | nindent 10 }} + volumeMounts: + - name: keystore + mountPath: /tmp/keystore + {{- range .Values.keystore }} + - name: keystore-{{ .secretName }} + mountPath: /tmp/keystoreSecrets/{{ .secretName }} + {{- end }} +{{ end }} + {{- if .Values.extraInitContainers }} +{{ tpl .Values.extraInitContainers . | indent 6 }} + {{- end }} + containers: + - name: "{{ template "elasticsearch.name" . }}" + securityContext: +{{ toYaml .Values.securityContext | indent 10 }} + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + # If the node is starting up wait for the cluster to be ready (request params: '{{ .Values.clusterHealthCheckParams }}' ) + # Once it has started only check that the node itself is responding + START_FILE=/tmp/.es_start_file + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ .Values.protocol }}://127.0.0.1:{{ .Values.httpPort }}${path} + } + + if [ -f "${START_FILE}" ]; then + echo 'Elasticsearch is already running, lets check the node is healthy and there are master nodes available' + http "/_cluster/health?timeout={{ .Values.healthCheckProbeTimeout }}" + else + echo 'Waiting for elasticsearch cluster to become ready (request params: "{{ .Values.clusterHealthCheckParams }}" )' + if http "/_cluster/health?{{ .Values.clusterHealthCheckParams }}" ; then + touch ${START_FILE} + exit 0 + else + echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParams }}" )' + exit 1 + fi + fi + ports: + - name: http + containerPort: {{ .Values.httpPort }} + - name: transport + containerPort: {{ .Values.transportPort }} + resources: +{{- if eq .Values.global.Tier_Type "Enterprise" }} +{{ toYaml .Values.resources | indent 10 }} +{{- else if eq .Values.global.Tier_Type "Basic" }} +{{ toYaml .Values.resources_basic | indent 10 }} +{{- else if eq .Values.global.Tier_Type "Standard" }} +{{ toYaml .Values.resources_standard | indent 10 }} +{{- else }} +{{ toYaml .Values.resources | indent 10 }} +{{- end }} + {{- if .Values.multitenant }} + envFrom: {{ toYaml .Values.envFrom | nindent 10 }} + {{- end }} + env: + - name: node.name + valueFrom: + fieldRef: + fieldPath: metadata.name + {{- if eq .Values.roles.master "true" }} + {{- if ge (int (include "elasticsearch.esMajorVersion" .)) 7 }} + - name: cluster.initial_master_nodes + value: "{{ template "elasticsearch.endpoints" . }}" + {{- else }} + - name: discovery.zen.minimum_master_nodes + value: "{{ .Values.minimumMasterNodes }}" + {{- end }} + {{- end }} + {{- if lt (int (include "elasticsearch.esMajorVersion" .)) 7 }} + - name: discovery.zen.ping.unicast.hosts + value: "{{ template "elasticsearch.masterService" . }}-headless" + {{- else }} + - name: discovery.seed_hosts + value: "{{ template "elasticsearch.masterService" . }}-headless" + {{- end }} + - name: cluster.name + value: "{{ .Values.clusterName }}" + - name: cluster.max_shards_per_node + value: "{{ .Values.maxShardsPerNode }}" + - name: network.host + value: "{{ .Values.networkHost }}" + - name: ES_JAVA_OPTS + value: {{ .Values.esJavaOpts | quote }} + {{- range $role, $enabled := .Values.roles }} + - name: node.{{ $role }} + value: "{{ $enabled }}" + {{- end }} +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} +{{- end }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: "{{ template "elasticsearch.uname" . }}" + mountPath: /usr/share/elasticsearch/data + {{- end }} +{{ if .Values.keystore }} + - name: keystore + mountPath: /usr/share/elasticsearch/config/elasticsearch.keystore + subPath: elasticsearch.keystore +{{ end }} + - name: elasticsearch-synonym-config + mountPath: /usr/share/elasticsearch/config/synonym.txt + subPath: synonym.txt + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.esConfig }} + - name: esconfig + mountPath: /usr/share/elasticsearch/config/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- if .Values.s3_regional_endpoint }} + - name: plugin-install-regional + mountPath: /usr/share/elasticsearch/plugin-install.sh + subPath: plugin-install.sh + {{- end }} + {{- if .Values.extraVolumeMounts }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} + {{- end }} + {{- end }} + {{- if .Values.masterTerminationFix }} + {{- if eq .Values.roles.master "true" }} + # This sidecar will prevent slow master re-election + # https://github.com/elastic/helm-charts/issues/63 + - name: elasticsearch-master-graceful-termination-handler + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: + - "sh" + - -c + - | + #!/usr/bin/env bash + set -eo pipefail + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ .Values.protocol }}://{{ template "elasticsearch.masterService" . }}:{{ .Values.httpPort }}${path} + } + + cleanup () { + while true ; do + local master="$(http "/_cat/master?h=node" || echo "")" + if [[ $master == "{{ template "elasticsearch.masterService" . }}"* && $master != "${NODE_NAME}" ]]; then + echo "This node is not master." + break + fi + echo "This node is still master, waiting gracefully for it to step down" + sleep 1 + done + + exit 0 + } + + trap cleanup SIGTERM + + sleep infinity & + wait $! + resources: +{{ toYaml .Values.sidecarResources | indent 10 }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + {{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} + {{- end }} + {{- end }} + {{- end }} +{{- if .Values.lifecycle }} + lifecycle: +{{ toYaml .Values.lifecycle | indent 10 }} +{{- end }} + {{- if .Values.extraContainers }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/synonym-configmap.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/synonym-configmap.yaml new file mode 100644 index 00000000000..a0757f4a477 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/synonym-configmap.yaml @@ -0,0 +1,30 @@ +{{- if or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: elasticsearch-read-synonym-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + synonym.txt: | + customer, consumer + hr, hour + country, nation + delete, archive + updated, modified, altered + lat, latitude + long, longitude + profit, loss, revenue + customer, consumer, buyer + id, userid, user_id, guid + qty, quantity + cus_id, customer_id, customerid + amt, amount + review, rating + cost, price + home for data teams, atlan +{{- end }} diff --git a/helm/atlas-read/charts/elasticsearch-read/values.yaml b/helm/atlas-read/charts/elasticsearch-read/values.yaml new file mode 100755 index 00000000000..1b2be74d897 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/values.yaml @@ -0,0 +1,326 @@ +--- + +multiarch: + enabled: false + image: {} + +clusterName: "atlas-elasticsearch-read" +nodeGroup: "master" + +# The service that non master groups will try to connect to when joining the cluster +# This should be set to clusterName + "-" + nodeGroup for your master group +global: + Tier_Type: "" +masterService: "" + +# Elasticsearch roles that will be applied to this nodeGroup +# These will be set as environment variables. E.g. node.master=true +roles: + master: "true" + ingest: "true" + data: "true" + +replicas: 3 +esMajorVersion: 7 +minimumMasterNodes: 1 +# JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage +# This is simpler and more reliable than manual calculation +esJavaOpts: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + +# Allows you to add any config files in /usr/share/elasticsearch/config/ +# such as elasticsearch.yml and log4j2.properties +esConfig: + elasticsearch.yml: | + http.max_content_length: 2147483647b + ingest.geoip.downloader.enabled: false + xpack.security.enabled: false + reindex.remote.whitelist: ["atlas-elasticsearch-master:9200","atlas-elasticsearch-master-0:9200","atlas-elasticsearch-master-1:9200","atlas-elasticsearch-master-2:9200"] + log4j2.properties: | + -Dlog4j2.formatMsgNoLookups=true + +# Extra environment variables to append to this nodeGroup +# This will be appended to the current 'env:' key. You can use any of the kubernetes env +# syntax here +# Env variable for elasticsearch +envFrom: [] +extraEnvs: + # - name: S3_BUCKET_NAME + # value: '' + - name: S3_BUCKET_PATH + value: 'backup/elasticsearch' + # - name: S3_BUCKET_ROLE_ARN + # value: '' + # - name: S3_BUCKET_REGION + # value: '' +# - name: MY_ENVIRONMENT_VAR +# value: the_value_goes_here + +# A list of secrets and their paths to mount inside the pod +# This is useful for mounting certificates for security and for mounting +# the X-Pack license +secretMounts: [] +# - name: elastic-certificates +# secretName: elastic-certificates +# path: /usr/share/elasticsearch/config/certs + +image: "ghcr.io/atlanhq/elasticsearch-atlan-v2" +imageTag: "7.17.4" +imagePullPolicy: "IfNotPresent" + +resources: + requests: + cpu: "500m" + memory: "6Gi" + limits: + cpu: "2000m" + memory: "7Gi" + +resources_basic: + requests: + memory: "50Mi" + limits: + memory: "4Gi" + +resources_standard: + requests: + memory: "50Mi" + limits: + memory: "5Gi" + +podAnnotations: {} + # iam.amazonaws.com/role: es-cluster + +# additionals labels +labels: {} + + +initResources: {} + +sidecarResources: {} + +networkHost: "0.0.0.0" + +maxShardsPerNode: "4000" + +volumeClaimTemplate: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + +rbac: + create: false + serviceAccountName: "" + +podSecurityPolicy: + create: false + name: "" + spec: + privileged: true + fsGroup: + rule: RunAsAny + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - secret + - configMap + - persistentVolumeClaim + +persistence: + enabled: true + annotations: {} + +extraVolumes: + - name: varlog + emptyDir: {} + +extraVolumeMounts: + - name: varlog + mountPath: /usr/share/elasticsearch/logs + +extraContainers: + - name: atlas-es-gclog-sidecar + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /usr/share/elasticsearch/logs/gc.log'] + volumeMounts: + - name: varlog + mountPath: /usr/share/elasticsearch/logs/ + +# This is the PriorityClass settings as defined in +# https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +priorityClassName: "" + +# By default this will make sure two pods don't end up on the same node +# Changing this to a region would allow you to spread pods across regions +antiAffinityTopologyKey: "kubernetes.io/hostname" + +# Hard means that by default pods will only be scheduled if there are enough nodes for them +# and that they will never end up on the same node. Setting this to soft will do this "best effort" +antiAffinity: "hard" + +# This is the node affinity settings as defined in +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature +nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: nodegroup + # operator: NotIn + # values: + # - atlan-spark + # - key: lifecycle + # operator: In + # values: + # - None + +# The default is to deploy all pods serially. By setting this to parallel all pods are started at +# the same time when bootstrapping the cluster +podManagementPolicy: "Parallel" + +protocol: http +httpPort: 9200 +transportPort: 9300 + +service: + labels: {} + labelsHeadless: {} + type: ClusterIP + nodePort: "" + annotations: {} + httpPortName: http + transportPortName: transport + loadBalancerSourceRanges: [] + +updateStrategy: RollingUpdate + +# This is the max unavailable setting for the pod disruption budget +# The default value of 1 will make sure that kubernetes won't allow more than 1 +# of your pods to be unavailable during maintenance +minAvailable: 2 + +podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + +# The following value is deprecated, +# please use the above podSecurityContext.fsGroup instead +fsGroup: "" + +securityContext: + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + +# How long to wait for elasticsearch to stop gracefully +terminationGracePeriod: 120 + +sysctlVmMaxMapCount: 262144 + +readinessProbe: + failureThreshold: 6 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 3 + timeoutSeconds: 15 + +custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + +# https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params wait_for_status +clusterHealthCheckParams: "wait_for_status=yellow&timeout=5s" + +# Timeout for readiness probe health check when ES is already running +healthCheckProbeTimeout: "1s" + +## Use an alternate scheduler. +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +schedulerName: "" + +imagePullSecrets: [] +nodeSelector: {} +tolerations: [] + +# Enabling this will publically expose your Elasticsearch instance. +# Only enable this if you have security enabled on your cluster +ingress: + enabled: false + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + path: / + hosts: + - chart-example.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +nameOverride: "" +fullnameOverride: "" + +# https://github.com/elastic/helm-charts/issues/63 +masterTerminationFix: false + +lifecycle: {} + # preStop: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + # postStart: + # exec: + # command: + # - bash + # - -c + # - | + # #!/bin/bash + # # Add a template to adjust number of shards/replicas + # TEMPLATE_NAME=my_template + # INDEX_PATTERN="logstash-*" + # SHARD_COUNT=8 + # REPLICA_COUNT=1 + # ES_URL=http://localhost:9200 + # while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $ES_URL)" != "200" ]]; do sleep 1; done + # curl -XPUT "$ES_URL/_template/$TEMPLATE_NAME" -H 'Content-Type: application/json' -d'{"index_patterns":['\""$INDEX_PATTERN"\"'],"settings":{"number_of_shards":'$SHARD_COUNT',"number_of_replicas":'$REPLICA_COUNT'}}' + +sysctlInitContainer: + enabled: true + +keystore: [] + +backup: + enabled: false + image: ghcr.io/atlanhq/alpine-curl-atlan-v2:3.21.0 + #imageTag: latest + schedule: '0 3 * * *' diff --git a/helm/atlas-read/templates/NOTES.txt b/helm/atlas-read/templates/NOTES.txt new file mode 100644 index 00000000000..6e602e9cb99 --- /dev/null +++ b/helm/atlas-read/templates/NOTES.txt @@ -0,0 +1,19 @@ +1. Get the application URL by running these commands: +{{- if .Values.atlas.ingress.enabled }} +{{- range .Values.atlas.ingress.hosts }} + http://{{ . }} +{{- end }} +{{- else if contains "NodePort" .Values.atlas.service.type }} + export NODE_IP=$(kubectl get nodes --namespace {{ .Values.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.atlas.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Values.Namespace }} {{ template "fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.atlas.service.port }} +{{- else if contains "ClusterIP" .Values.atlas.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Values.Namespace }} -l "app={{ template "name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 8080:{{ .Values.atlas.service.port }} + echo "Visit http://127.0.0.1:8080 to use your application" + echo "Default username/password is admin/admin" +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/_helpers.tpl b/helm/atlas-read/templates/_helpers.tpl new file mode 100644 index 00000000000..2f1b146cddc --- /dev/null +++ b/helm/atlas-read/templates/_helpers.tpl @@ -0,0 +1,47 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "atlas.ingress.apiVersion" -}} + {{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} + {{- print "networking.k8s.io/v1" -}} + {{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} + {{- print "networking.k8s.io/v1beta1" -}} + {{- else -}} + {{- print "extensions/v1beta1" -}} + {{- end -}} +{{- end -}} +{{/* +Return if ingress is stable. +*/}} +{{- define "atlas.ingress.isStable" -}} + {{- eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1" -}} +{{- end -}} +{{/* +Return if ingress supports ingressClassName. +*/}} +{{- define "atlas.ingress.supportsIngressClassName" -}} + {{- or (eq (include "atlas.ingress.isStable" .) "true") (and (eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}} +{{- end -}} +{{/* +Return if ingress supports pathType. +*/}} +{{- define "atlas.ingress.supportsPathType" -}} + {{- or (eq (include "atlas.ingress.isStable" .) "true") (and (eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}} +{{- end -}} \ No newline at end of file diff --git a/helm/atlas-read/templates/atlas-audit-index-configmap.yaml b/helm/atlas-read/templates/atlas-audit-index-configmap.yaml new file mode 100644 index 00000000000..24a3711296f --- /dev/null +++ b/helm/atlas-read/templates/atlas-audit-index-configmap.yaml @@ -0,0 +1,168 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-audit-index + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-audit.sh: | + {{ if or (eq .Values.global.esIsolation.enabled true) (eq .Values.global.globalSvcIsolation.enabled true) }} + curl -X PUT "atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200/_template/atlas.audit?pretty" -H 'Content-Type: application/json' -d' + {{ else }} + curl -X PUT "atlas-elasticsearch-master.atlas.svc.cluster.local:9200/_template/atlas.audit?pretty" -H 'Content-Type: application/json' -d' + {{ end }} + { + "index_patterns": ["atlas.audit*"], + "settings": { + "number_of_shards" : 3, + "number_of_replicas" : 1, + "analysis": { + "analyzer": { + "resource_path": { + "tokenizer": "resource_hierarchy" + } + }, + "tokenizer": { + "resource_hierarchy": { + "type": "path_hierarchy", + "delimiter": "/" + } + } + } + }, + "mappings": { + "properties": { + "all": { + "type": "text" + }, + "Action": { + "type": "keyword" + }, + "glossaryQualifiedName": { + "type": "keyword" + }, + "Level": { + "type": "keyword" + }, + "Type": { + "type": "keyword" + }, + "access": { + "type": "keyword", + "copy_to": [ + "all" + ] + }, + "action": { + "type": "keyword" + }, + "agent": { + "type": "keyword" + }, + "agentHost": { + "type": "keyword" + }, + "cliIP": { + "type": "keyword" + }, + "cluster_name": { + "type": "keyword" + }, + "datetime": { + "type": "text" + }, + "enforcer": { + "type": "keyword" + }, + "event_count": { + "type": "integer" + }, + "event_dur_ms": { + "type": "integer" + }, + "evtTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss.SSS" + }, + "id": { + "type": "keyword" + }, + "logType": { + "type": "keyword" + }, + "logtimestamp": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss,SSS" + }, + "policy": { + "type": "integer" + }, + "policy_version": { + "type": "integer" + }, + "qualifiedName": { + "type": "keyword" + }, + "qualifiedNameText": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "qnComponents": { + "type": "nested" + }, + "repo": { + "type": "keyword" + }, + "repoType": { + "type": "keyword" + }, + "reqUser": { + "type": "keyword", + "copy_to": [ + "all" + ] + }, + "resType": { + "type": "keyword" + }, + "resource": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "result": { + "type": "keyword" + }, + "seq_num": { + "type": "integer" + }, + "tags": { + "type": "text" + } + } + } + } + ' + + + + + + + + + + + + + + +{{- end }} diff --git a/helm/atlas-read/templates/atlas-logback-config-configmap.yaml b/helm/atlas-read/templates/atlas-logback-config-configmap.yaml new file mode 100644 index 00000000000..d1c86d7c04c --- /dev/null +++ b/helm/atlas-read/templates/atlas-logback-config-configmap.yaml @@ -0,0 +1,251 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-logback-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-logback.xml: | + + + + + + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + true + * + true + true + + + + + + filter + atlas-audit + + + + logback: %d %-5p - atlas-audit - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-auth-audit + + + + logback: %d %-5p - atlas-auth-audit - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-application + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-perf + + + + logback: %d %-5p - atlas-perf - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-ranger + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-metrics + + + + logback: %d %-5p - atlas-metrics - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-tasks + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{{- end }} diff --git a/helm/atlas-read/templates/configmap-init-container-script.yaml b/helm/atlas-read/templates/configmap-init-container-script.yaml new file mode 100644 index 00000000000..6a72d896a90 --- /dev/null +++ b/helm/atlas-read/templates/configmap-init-container-script.yaml @@ -0,0 +1,122 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-init-container-script + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-init-container.sh: | + #!/bin/sh + + echo "[+] Checking for Cassandra and Elasticsearch" + /tmp/atlas-init.sh + + echo "[+] Copying Config File" + cp /tmp/configfile/atlas-application.properties /tmp/newconfigfile/atlas-application.properties + + echo "[+] Checking for Keycloak" + until printf "." && curl -s $KEYCLOAK_ADDRESS/realms/master; do + sleep 2; + done; + echo 'Keycloak OK ✓' + + # Check for Keycloak realms and clients + echo "[+] Checking for Keycloak realms and clients" + # Set variables + KEYCLOAK_SERVER=$KEYCLOAK_ADDRESS + KEYCLOAK_REALM="master" + KEYCLOAK_USERNAME="batman" + KEYCLOAK_PASSWORD=$RANGER_PASSWORD + CLIENT_NAME="atlan-backend" + REALM_NAME="default" + REALM_EXISTS=false + CLIENT_EXISTS=false + + until [ "$REALM_EXISTS" = true ] && [ "$CLIENT_EXISTS" = true ] + do + # Fetch token + TOKEN_RESPONSE=$(curl -s -X POST "${KEYCLOAK_SERVER}/realms/${KEYCLOAK_REALM}/protocol/openid-connect/token" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=${KEYCLOAK_USERNAME}" \ + -d "password=${KEYCLOAK_PASSWORD}" \ + -d 'grant_type=password' \ + -d 'client_id=admin-cli') + + ACCESS_TOKEN=$(echo $TOKEN_RESPONSE | jq -r .access_token) + + if [ "$ACCESS_TOKEN" == "null" ]; then + echo "Error: Unable to fetch access token. Please check your server, realm, username, and password." + sleep 5 + continue + fi + + # Check if realm exists + REALM_RESPONSE=$(curl -s -X GET "${KEYCLOAK_SERVER}/admin/realms/${REALM_NAME}" \ + -H "Authorization: Bearer $ACCESS_TOKEN") + + REALM_ERROR=$(echo $REALM_RESPONSE | jq -r '.error // empty') + + if [ "$REALM_RESPONSE" == "" ] || [ "$REALM_ERROR" != "" ]; then + echo "Realm ${REALM_NAME} does not exist or could not be retrieved. Retrying in 5 seconds..." + REALM_EXISTS=false + sleep 5 + continue + else + echo "Realm ${REALM_NAME} exists" + REALM_EXISTS=true + fi + + # Check if client exists + CLIENT_ID_RESPONSE=$(curl -s -X GET "${KEYCLOAK_SERVER}/admin/realms/${REALM_NAME}/clients?clientId=${CLIENT_NAME}" \ + -H "Authorization: Bearer $ACCESS_TOKEN") + + CLIENT_ID=$(echo $CLIENT_ID_RESPONSE | jq -r '.[0].id') + + if [ "$CLIENT_ID_RESPONSE" == "" ] || [ "$CLIENT_ID" == "null" ]; then + echo "Client ${CLIENT_NAME} does not exist in realm ${REALM_NAME} or could not be retrieved. Retrying in 5 seconds..." + CLIENT_EXISTS=false + sleep 5 + continue + else + echo "Client ${CLIENT_NAME} exists in realm ${REALM_NAME}" + CLIENT_EXISTS=true + fi + done + + echo "[+] Checking for Cache Invalidation Proxy" + until printf "." && curl -s http://cinv.atlas.svc.cluster.local:5000/health; do + sleep 2; + done; + echo '[+] Cache Invalidation Proxy OK ✓' + + echo "[+] Checking for Zookeeper" + while true + do + sleep 5 + echo "[+] checking for zookeeper service" + leader=`echo stat | nc zookeeper 2181 | grep leader |wc -l` + echo $leader + if [ $leader -eq 1 ]; then echo "Zookeeper cluster up"; break; fi + done + echo 'Zookeeper OK ✓' + + echo "[+] Checking for Kafka" + host='kafka-headless.kafka.svc.cluster.local' + port=9092 + while true; do + if nc -z -w 1 "$host" "$port"; then + echo "(+) Kafka OK" + break + else + echo "(-) Kafka is either down or in the process of a restart" + sleep 10 + fi + done + echo "[+] Atlas audit indexing" + /scripts/atlas-audit.sh +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/configmap-init-script.yaml b/helm/atlas-read/templates/configmap-init-script.yaml new file mode 100644 index 00000000000..279823fb016 --- /dev/null +++ b/helm/atlas-read/templates/configmap-init-script.yaml @@ -0,0 +1,268 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-init-script + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-init.sh: | + #!/bin/sh + {{ if or (eq .Values.global.svcIsolation.enabled true) (eq .Values.global.globalSvcIsolation.enabled true) }} + CASSANDRA_HOST="atlas-cassandra-online-dc" + {{ else }} + CASSANDRA_HOST="atlas-cassandra" + {{ end }} + {{ if or (eq .Values.global.esIsolation.enabled true) (eq .Values.global.globalSvcIsolation.enabled true) }} + ES_HOST="atlas-elasticsearch-read-master" + {{ else }} + ES_HOST="atlas-elasticsearch-master" + {{ end }} + ES_LOGGING="logging-master.logging.svc.cluster.local" + ### Check cassandra health + echo "Checking if Cassandra is up and running ..." + retries=0 + # Try to connect on Cassandra CQLSH port 9042 + nc -z $CASSANDRA_HOST 9042 + cassandra_status=$? + + while [[ $retries -lt 10 && $cassandra_status != 0 ]]; do + echo "Cassandra doesn't reply to requests on ports 7199 and/or 9042. Sleeping for a while and trying again... retry ${retries}" + + # Sleep for a while + sleep 2s + + nc -z $CASSANDRA_HOST 9042 + cassandra_status=$? + + let "retries++" + done + + if [ $cassandra_status -ne 0 ]; then + echo "/!\ ERROR: Cassandra check has ended with errors" + exit 1 + else + echo "Cassandra check completed successfully --- OK" + fi + + ### Check elastic search health + + es_status=0 + retries=0 + while [[ $retries -lt 10 && $es_status == 0 ]]; do + echo "Checking if ElasticSearch is up and running ..." + + cluster_color=`curl -XGET -s http://$ES_HOST:9200/_cluster/health | \ + python3 -c 'import sys, json; print(json.dumps(json.load(sys.stdin)["status"], sort_keys=True, indent=4))'` + + echo "Cluster is ${cluster_color}" + + if [ "$cluster_color" != "\"green\"" ] && [ "$cluster_color" != "\"yellow\"" ] ; then + echo "Elasticsearch $ES_HOST is not up, retrying in 2 secs ..." + + # Sleep for a while + sleep 2s + else + es_status=1 + break + fi + + let "retries++" + done + + if [ $es_status -ne 0 ] ; then + echo "Elasticsearch check completed successfully --- OK" + else + echo "/!\ ERROR: Elasticsearch check has ended with errors" + exit 1 + fi + + # Check logging health and create index with mapping + + es_log_status=0 + retries=0 + while [[ $retries -lt 10 && $es_log_status == 0 ]]; do + echo "Checking if Logging ElasticSearch is up and running ..." + + cluster_color=`curl -XGET -s http://$ES_HOST:9200/_cluster/health | \ + python3 -c 'import sys, json; print(json.dumps(json.load(sys.stdin)["status"], sort_keys=True, indent=4))'` + + echo "Logging Cluster is ${cluster_color}" + + if [ "$cluster_color" != "\"green\"" ] && [ "$cluster_color" != "\"yellow\"" ] ; then + echo "Elasticsearch $ES_HOST is not up, retrying in 2 secs ..." + + # Sleep for a while + sleep 2s + else + es_log_status=1 + # create index + echo "Creating Index ..." + curl -kv -X PUT "http://$ES_HOST:9200/ranger-audit" \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "mappings": { + "properties": { + "expire_at": { + "type": "date", + "store": true, + "doc_values": true + }, + "ttl": { + "type": "text", + "store": true + }, + "version": { + "type": "long", + "store": true, + "index": false + }, + "access": { + "type": "keyword" + }, + "action": { + "type": "keyword" + }, + "agent": { + "type": "keyword" + }, + "agentHost": { + "type": "keyword" + }, + "cliIP": { + "type": "keyword" + }, + "cliType": { + "type": "keyword" + }, + "cluster": { + "type": "keyword" + }, + "reqContext": { + "type": "keyword" + }, + "enforcer": { + "type": "keyword" + }, + "event_count": { + "type": "long", + "doc_values": true + }, + "event_dur_ms": { + "type": "long", + "doc_values": true + }, + "evtTime": { + "type": "date", + "doc_values": true + }, + "id": { + "type": "keyword", + "store": true + }, + "logType": { + "type": "keyword" + }, + "policy": { + "type": "long", + "doc_values": true + }, + "proxyUsers": { + "type": "keyword" + }, + "reason": { + "type": "text" + }, + "repo": { + "type": "keyword" + }, + "repoType": { + "type": "integer", + "doc_values": true + }, + "req_caller_id": { + "type": "keyword" + }, + "req_self_id": { + "type": "keyword" + }, + "reqData": { + "type": "text" + }, + "reqUser": { + "type": "keyword" + }, + "reqEntityGuid": { + "type": "keyword" + }, + "resType": { + "type": "keyword" + }, + "resource": { + "type": "keyword", + "fields": { + "text": { + "type": "text", + "analyzer": "atlan_ranger_text_analyzer" + } + } + }, + "result": { + "type": "integer" + }, + "seq_num": { + "type": "long", + "doc_values": true + }, + "sess": { + "type": "keyword" + }, + "tags": { + "type": "keyword" + }, + "tags_str": { + "type": "text" + }, + "text": { + "type": "text" + }, + "zoneName": { + "type": "keyword" + }, + "policyVersion": { + "type": "long" + } + } + }, + "settings": { + "index": { + "analysis": { + "analyzer": { + "atlan_ranger_text_analyzer": { + "filter": [ + "lowercase" + ], + "type": "custom", + "tokenizer": "atlan_ranger_tokenizer" + } + }, + "tokenizer": { + "atlan_ranger_tokenizer": { + "pattern": "( |_|-|'\''|/|@)", + "type": "pattern" + } + } + } + } + } + }' + break + fi + + let "retries++" + done +{{- end }} diff --git a/helm/atlas-read/templates/configmap.yaml b/helm/atlas-read/templates/configmap.yaml new file mode 100644 index 00000000000..8ad0e049a23 --- /dev/null +++ b/helm/atlas-read/templates/configmap.yaml @@ -0,0 +1,514 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-application.properties: | + # + # Licensed to the Apache Software Foundation (ASF) under one + # or more contributor license agreements. See the NOTICE file + # distributed with this work for additional information + # regarding copyright ownership. The ASF licenses this file + # to you under the Apache License, Version 2.0 (the + # "License"); you may not use this file except in compliance + # with the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # + + ######### Graph Database Configs ######### + + # Graph Database + + #Configures the graph database to use. Defaults to JanusGraph + #atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + + # Graph Storage + # Set atlas.graph.storage.backend to the correct value for your desired storage + # backend. Possible values: + # + # hbase + # cassandra + # embeddedcassandra - Should only be set by building Atlas with -Pdist,embedded-cassandra-solr + # berkeleyje + # + # See the configuration documentation for more information about configuring the various storage backends. + # + # atlas.graph.storage.backend=hbase2 + # atlas.graph.storage.hbase.table=apache_atlas_janus + + + #Hbase + #For standalone mode , specify localhost + #for distributed mode, specify zookeeper quorum here + # atlas.graph.storage.hostname= + # atlas.graph.storage.hbase.regions-per-server=1 + # atlas.graph.storage.lock.wait-time=10000 + + #In order to use Cassandra as a backend, comment out the hbase specific properties above, and uncomment the + #the following properties + #atlas.graph.storage.clustername= + #atlas.graph.storage.port= + atlas.graph.storage.backend=cql + {{ if or (eq .Values.global.svcIsolation.enabled true) (eq .Values.global.globalSvcIsolation.enabled true) }} + atlas.graph.storage.hostname=atlas-cassandra-online-dc + atlas.graph.storage.cql.local-datacenter=online-dc + {{ else }} + atlas.graph.storage.hostname=atlas-cassandra + {{ end }} + atlas.graph.storage.cql.keyspace=atlas + atlas.graph.storage.cql.replication-factor={{ .Values.cassandra.config.cluster_size }} + atlas.graph.storage.clustername={{ .Values.cassandra.config.cluster_name }} + atlas.graph.storage.port={{ .Values.cassandra.config.ports.cql }} + atlas.graph.query.fast-property=true + atlas.graph.query.batch=true + atlas.graph.storage.cql.remote-core-connections-per-host=5 + atlas.graph.storage.cql.remote-max-connections-per-host=5 + atlas.graph.storage.cql.request-timeout=5000 + atlas.graph.graph.replace-instance-if-exists=true + # Gremlin Query Optimizer + # + # Enables rewriting gremlin queries to maximize performance. This flag is provided as + # a possible way to work around any defects that are found in the optimizer until they + # are resolved. + #atlas.query.gremlinOptimizerEnabled=true + + # Delete handler + # + # This allows the default behavior of doing "soft" deletes to be changed. + # + # Allowed Values: + # org.apache.atlas.repository.store.graph.v1.SoftDeleteHandlerV1 - all deletes are "soft" deletes + # org.apache.atlas.repository.store.graph.v1.HardDeleteHandlerV1 - all deletes are "hard" deletes + # + atlas.DeleteHandlerV1.impl=org.apache.atlas.repository.store.graph.v1.SoftDeleteHandlerV1 + + # This allows delete-type selection per REST API call + # Ref: http://mail-archives.apache.org/mod_mbox/atlas-dev/201811.mbox/%3CJIRA.13169850.1530632244000.352730.1542268860569@Atlassian.JIRA%3E + atlas.rest.enable.delete.type.override=true + + # Entity audit repository + # + # This allows the default behavior of logging entity changes to hbase to be changed. + # + # Allowed Values: + # org.apache.atlas.repository.audit.HBaseBasedAuditRepository - log entity changes to hbase + # org.apache.atlas.repository.audit.CassandraBasedAuditRepository - log entity changes to cassandra + # org.apache.atlas.repository.audit.NoopEntityAuditRepository - disable the audit repository + # + atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.NoopEntityAuditRepository + atlas.EntityAuditRepository.keyspace=atlas_audit + atlas.EntityAuditRepository.replicationFactor={{ .Values.cassandra.config.cluster_size }} + atlas.entity.audit.differential=true + atlas.EntityAuditRepositorySearch.impl=org.apache.atlas.repository.audit.ESBasedAuditRepository + # if Cassandra is used as a backend for audit from the above property, uncomment and set the following + # properties appropriately. If using the embedded cassandra profile, these properties can remain + # commented out. + # atlas.EntityAuditRepository.keyspace=atlas_audit + # atlas.EntityAuditRepository.replicationFactor=1 + + ######### Atlas Entity Attribute configs ######### + atlas.entities.attribute.allowed.large.attributes={{ .Values.atlas.config.entities_allowed_large_attributes }} + + # Graph Search Index + atlas.graph.index.search.backend=elasticsearch + + #Solr + #Solr cloud mode properties + # atlas.graph.index.search.solr.mode=cloud + # atlas.graph.index.search.solr.zookeeper-url= + # atlas.graph.index.search.solr.zookeeper-connect-timeout=60000 + # atlas.graph.index.search.solr.zookeeper-session-timeout=60000 + # atlas.graph.index.search.solr.wait-searcher=true + #Solr http mode properties + #atlas.graph.index.search.solr.mode=http + #atlas.graph.index.search.solr.http-urls=http://localhost:8983/solr + + # ElasticSearch support (Tech Preview) + # Comment out above solr configuration, and uncomment the following two lines. Additionally, make sure the + # hostname field is set to a comma delimited set of elasticsearch master nodes, or an ELB that fronts the masters. + # + # Elasticsearch does not provide authentication out of the box, but does provide an option with the X-Pack product + # https://www.elastic.co/products/x-pack/security + # + # Alternatively, the JanusGraph documentation provides some tips on how to secure Elasticsearch without additional + # plugins: http://docs.janusgraph.org/latest/elasticsearch.html + + {{ if or (eq .Values.global.esIsolation.enabled true) (eq .Values.global.globalSvcIsolation.enabled true) }} + atlas.graph.index.search.hostname=atlas-elasticsearch-read-master:9200 + {{ else }} + atlas.graph.index.search.hostname=atlas-elasticsearch-master:9200 + {{ end }} + atlas.graph.index.search.elasticsearch.client-only=true + atlas.graph.index.search.elasticsearch.retry_on_conflict=5 + atlas.graph.index.search.max-result-set-size=1000 + atlas.index.audit.elasticsearch.total_field_limit=10000 + atlas.index.audit.elasticsearch.refresh_interval: 1s + + + # Solr-specific configuration property + # atlas.graph.index.search.max-result-set-size=150 + + ######### Notification Configs ######### + atlas.kafka.bootstrap.servers=kafka-0.kafka-headless.kafka.svc.cluster.local:9092,kafka-1.kafka-headless.kafka.svc.cluster.local:9092,kafka-2.kafka-headless.kafka.svc.cluster.local:9092 + + atlas.kafka.zookeeper.session.timeout.ms=60000 + atlas.kafka.zookeeper.connection.timeout.ms=30000 + atlas.kafka.zookeeper.sync.time.ms=20 + atlas.kafka.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 + + atlas.kafka.auto.commit.interval.ms=1000 + atlas.kafka.hook.group.id=atlas + atlas.kafka.enable.auto.commit=false + atlas.kafka.auto.offset.reset=earliest + atlas.kafka.session.timeout.ms=30000 + atlas.kafka.offsets.topic.replication.factor=2 + atlas.kafka.poll.timeout.ms=2000 + + atlas.notification.create.topics=true + atlas.notification.replicas=3 + atlas.notification.topics=ATLAS_HOOK,ATLAS_ENTITIES + atlas.notification.log.failed.messages=true + atlas.notification.failed.messages.filename=atlas_hook_failed_messages.log + atlas.notification.consumer.retry.interval=3000 + atlas.notification.hook.retry.interval=3000 + # Enable for Kerberized Kafka clusters + #atlas.notification.kafka.service.principal=kafka/_HOST@EXAMPLE.COM + #atlas.notification.kafka.keytab.location=/etc/security/keytabs/kafka.service.keytab + + ## Server port configuration + #atlas.server.http.port=21000 + #atlas.server.https.port=21443 + + ######### Security Properties ######### + + # SSL config + atlas.enableTLS=false + + #truststore.file=/path/to/truststore.jks + #cert.stores.credential.provider.path=jceks://file/path/to/credentialstore.jceks + + #following only required for 2-way SSL + #keystore.file=/path/to/keystore.jks + + # Authentication config + + atlas.authentication.method.kerberos=false + atlas.authentication.method.file=false + + atlas.authentication.method.keycloak=true + atlas.authentication.method.keycloak.file=${sys:atlas.home}/conf/keycloak.json + atlas.authentication.method.keycloak.ugi-groups=false + atlas.authentication.method.keycloak.groups_claim=groups + + #### ldap.type= LDAP or AD + atlas.authentication.method.ldap.type=none + + #### user credentials file + atlas.authentication.method.file.filename=${sys:atlas.home}/conf/users-credentials.properties + + ### groups from UGI + #atlas.authentication.method.ldap.ugi-groups=true + + ######## LDAP properties ######### + #atlas.authentication.method.ldap.url=ldap://:389 + #atlas.authentication.method.ldap.userDNpattern=uid={0},ou=People,dc=example,dc=com + #atlas.authentication.method.ldap.groupSearchBase=dc=example,dc=com + #atlas.authentication.method.ldap.groupSearchFilter=(member=uid={0},ou=Users,dc=example,dc=com) + #atlas.authentication.method.ldap.groupRoleAttribute=cn + #atlas.authentication.method.ldap.base.dn=dc=example,dc=com + #atlas.authentication.method.ldap.bind.dn=cn=Manager,dc=example,dc=com + #atlas.authentication.method.ldap.bind.password= + #atlas.authentication.method.ldap.referral=ignore + #atlas.authentication.method.ldap.user.searchfilter=(uid={0}) + #atlas.authentication.method.ldap.default.role= + + + ######### Active directory properties ####### + #atlas.authentication.method.ldap.ad.domain=example.com + #atlas.authentication.method.ldap.ad.url=ldap://:389 + #atlas.authentication.method.ldap.ad.base.dn=(sAMAccountName={0}) + #atlas.authentication.method.ldap.ad.bind.dn=CN=team,CN=Users,DC=example,DC=com + #atlas.authentication.method.ldap.ad.bind.password= + #atlas.authentication.method.ldap.ad.referral=ignore + #atlas.authentication.method.ldap.ad.user.searchfilter=(sAMAccountName={0}) + #atlas.authentication.method.ldap.ad.default.role= + + ######### JAAS Configuration ######## + + #atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule + #atlas.jaas.KafkaClient.loginModuleControlFlag = required + #atlas.jaas.KafkaClient.option.useKeyTab = true + #atlas.jaas.KafkaClient.option.storeKey = true + #atlas.jaas.KafkaClient.option.serviceName = kafka + #atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab + #atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + + ######### Server Properties ######### + atlas.rest.address=http://localhost:21000 + # If enabled and set to true, this will run setup steps when the server starts + #atlas.server.run.setup.on.start=false + + ######### Entity Audit Configs ######### + # atlas.audit.hbase.tablename=apache_atlas_entity_audit + # atlas.audigit.zookeeper.session.timeout.ms=1000 + # atlas.audit.hbase.zookeeper.quorum=localhost:2181 + + ######### High Availability Configuration ######## + {{- if eq .Values.atlas_ha false }} + atlas.server.ha.enabled=false + {{- else if eq .Values.atlas_ha true }} + atlas.server.ha.enabled=true + {{- else }} + atlas.server.ha.enabled=false + {{- end }} + atlas.server.type.cache-refresher=http://cinv.atlas.svc.cluster.local:5000/cinv + atlas.server.type.cache-refresher-health=http://cinv.atlas.svc.cluster.local:5000/health + #### Enabled the configs below as per need if HA is enabled ##### + {{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} + atlas.server.ids=id1,id2 + atlas.server.address.id1=atlas-read-0.atlas-read-service-atlas.atlas.svc.cluster.local:21000 + atlas.server.address.id2=atlas-read-1.atlas-read-service-atlas.atlas.svc.cluster.local:21000 + {{- end }} + atlas.server.ha.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 + atlas.server.ha.zookeeper.retry.sleeptime.ms=10000 + atlas.server.ha.zookeeper.num.retries=18 + atlas.server.ha.zookeeper.session.timeout.ms=20000 + ## if ACLs need to be set on the created nodes, uncomment these lines and set the values ## + #atlas.server.ha.zookeeper.acl=: + #atlas.server.ha.zookeeper.auth=: + + ######### Atlas Authorization ######### + #atlas.authorizer.impl=none + + {{- if eq .Values.atlas_auth true }} + atlas.authorizer.impl=atlas + {{- else }} + atlas.authorizer.impl=org.apache.ranger.authorization.atlas.authorizer.RangerAtlasAuthorizer + {{- end }} + + atlas.authorizer.enable.delta_based_refresh={{ .Values.atlas.authorizer.enable_delta_based_refresh }} + atlas.authorizer.enable.abac={{ .Values.atlas.authorizer.enable_abac }} + + #atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + + ######### Atlas User service ######### + atlas.user-service-url=http://heracles-service.heracles.svc.cluster.local + + ######### Type Cache Implementation ######## + # A type cache class which implements + # org.apache.atlas.typesystem.types.cache.TypeCache. + # The default implementation is org.apache.atlas.typesystem.types.cache.DefaultTypeCache which is a local in-memory type cache. + #atlas.TypeCache.impl= + + ######### Performance Configs ######### + atlas.graph.storage.lock.retries=5 + + ######### Redis Cache Configs ######### + {{- if eq .Values.atlas.cache.enabled false }} + atlas.graph.cache.db-cache=false + {{- else if eq .Values.atlas.cache.enabled true }} + atlas.graph.cache.db-cache=true + atlas.graph.metrics.merge-stores=false + atlas.graph.cache.cache-type=redis + atlas.graph.cache.db-cache-expiry-time=86400000 + atlas.graph.cache.cache-keyspace-prefix=atlas + atlas.graph.cache.redis-db-id=1 + atlas.graph.cache.redis-client-name=atlas-metastore + atlas.graph.cache.redis-cache-size=100000 + atlas.graph.cache.redis-cache-server-mode=sentinel + atlas.graph.cache.redis-cache-server-url=redis://{{ .Values.atlas.redis.host }}:{{ .Values.atlas.redis.port }} + atlas.graph.cache.redis-cache-sentinel-urls={{ .Values.atlas.redis.sentinel_urls }} + atlas.graph.cache.redis-cache-lock-watchdog-ms=300000 + atlas.graph.cache.redis-cache-username={{ .Values.atlas.redis.username }} + atlas.graph.cache.redis-cache-password={{ .Values.atlas.redis.password }} + atlas.graph.cache.redis-cache-mastername={{ .Values.atlas.redis.master_name }} + atlas.graph.cache.redis-cache-connectTimeout=2000 + {{- end }} + + atlas.webserver.minthreads=40 + # Maximum number of threads in the atlas web server + atlas.webserver.maxthreads=400 + # Keepalive time in secs for the thread pool of the atlas web server + atlas.webserver.keepalivetimesecs=30 + # Queue size for the requests(when max threads are busy) for the atlas web server + atlas.webserver.queuesize=200 + ######### CSRF Configs ######### + atlas.rest-csrf.enabled=false + #atlas.rest-csrf.browser-useragents-regex=^Mozilla.*,^Opera.*,^Chrome.* + #atlas.rest-csrf.methods-to-ignore=GET,OPTIONS,HEAD,TRACE + #atlas.rest-csrf.custom-header=X-XSRF-HEADER + + ############ KNOX Configs ################ + #atlas.sso.knox.browser.useragent=Mozilla,Chrome,Opera + #atlas.sso.knox.enabled=true + #atlas.sso.knox.providerurl=https://:8443/gateway/knoxsso/api/v1/websso + #atlas.sso.knox.publicKey= + + ############ Atlas Metric/Stats configs ################ + # Format: atlas.metric.query.. + atlas.metric.query.cache.ttlInSecs=900 + #atlas.metric.query.general.typeCount= + #atlas.metric.query.general.typeUnusedCount= + #atlas.metric.query.general.entityCount= + #atlas.metric.query.general.tagCount= + #atlas.metric.query.general.entityDeleted= + # + #atlas.metric.query.entity.typeEntities= + #atlas.metric.query.entity.entityTagged= + # + #atlas.metric.query.tags.entityTags= + + ######### Compiled Query Cache Configuration ######### + + # The size of the compiled query cache. Older queries will be evicted from the cache + # when we reach the capacity. + + #atlas.CompiledQueryCache.capacity=1000 + + # Allows notifications when items are evicted from the compiled query + # cache because it has become full. A warning will be issued when + # the specified number of evictions have occurred. If the eviction + # warning threshold <= 0, no eviction warnings will be issued. + + #atlas.CompiledQueryCache.evictionWarningThrottle=0 + + + ######### Full Text Search Configuration ######### + + #Set to false to disable full text search. + #atlas.search.fulltext.enable=true + + + ########## Atlas Discovery ############# + atlas.objectId.support.entity.attributes=true + atlas.objectId.entity.attributes=AtlasGlossary:name,AtlasGlossaryTerm:name,AtlasGlossaryCategory:name + + + ######### Gremlin Search Configuration ######### + + #Set to false to disable gremlin search. + atlas.search.gremlin.enable=false + + + ########## Add http headers ########### + + #atlas.headers.Access-Control-Allow-Origin=* + #atlas.headers.Access-Control-Allow-Methods=GET,OPTIONS,HEAD,PUT,POST,DELETE + #atlas.headers.Access-Control-Allow-Headers=* + #atlas.headers.= + + + ########## Slack Notification ############# + atlas.notifications.slackWebhook={{ .Values.atlas.notification.slackWebhook }} + + {{ if .Values.atlas.redis.enabled }} + {{ printf "\n" }} + ########## Add query metastore ########### + atlan.cache.redis.host={{ .Values.atlas.redis.host }} + atlan.cache.redis.port={{ .Values.atlas.redis.port }} + atlan.cache.redis.password={{ .Values.atlas.redis.password }} + atlas.cache.redis.maxConnections={{ .Values.atlas.redis.maxConnections }} + atlas.cache.redis.timeout={{ .Values.atlas.redis.timeout }} + atlan.EntityCacheListener.impl=org.apache.atlas.repository.cache.EntityCacheListenerV2 + atlan.QueryCacheRepository.impl=org.apache.atlas.repository.cache.AtlanQueryCacheRepository + {{ printf "\n" }} + + {{ end }} + + + ########## Atlas Monitoring ############# + + atlas.graph.metrics.enabled = true + atlas.graph.metrics.jmx.enabled = true + atlas.statsd.enable = true + + ########## Atlas deferred-actions (background tasks) ############# + + atlas.tasks.enabled = false + + + ########## Ranger Credentials + + atlas.ranger.username = admin + atlas.ranger.password = {{ .Values.atlas.ranger.RANGER_PASSWORD }} + atlas.ranger.base.url = {{ .Values.atlas.ranger.RANGER_SERVICE_URL }} + + ####### Redis credentials ####### + ###### allowed values for redis implementation ########## + # org.apache.atlas.service.redis.RedisServiceImpl - connects to sentinel cluster, for prod. + # org.apache.atlas.service.redis.RedisServiceLocalImpl - connects to local redis cluster, for local dev. + # org.apache.atlas.service.redis.NoRedisServiceImpl - default, dummy redis implementation. + atlas.redis.service.impl = org.apache.atlas.service.redis.RedisServiceImpl + atlas.redis.url = redis://{{ .Values.atlas.redis.host }}:{{ .Values.atlas.redis.port }} + atlas.redis.sentinel.urls = {{ .Values.atlas.redis.sentinel_urls }} + atlas.redis.username = {{ .Values.atlas.redis.username }} + atlas.redis.password = {{ .Values.atlas.redis.password }} + atlas.redis.master_name = {{ .Values.atlas.redis.master_name }} + atlas.redis.lock.wait_time.ms=15000 + # Renew lock for every 10mins + atlas.redis.lock.watchdog_timeout.ms=600000 + + atlas.jetty.request.buffer.size=32768 + + # valid uri patterns to collect metrics + atlas.metrics.uri_patterns=/api/(meta|atlas/v2)/glossary/terms/[^/]+/assignedEntities,/api/(meta|atlas/v2)/lineage/[^/]+,/api/(meta|atlas/v2)/lineage/list,/api/(meta|atlas/v2)/entity/accessors,/api/(meta|atlas/v2)/entity/auditSearch,/api/(meta|atlas/v2)/entity/bulk,/api/(meta|atlas/v2)/entity/bulk/setClassifications,/api/(meta|atlas/v2)/entity/bulk/uniqueAttribute,/api/(meta|atlas/v2)/entity/evaluator,/api/(meta|atlas/v2)/entity/guid/[^/]+,/api/(meta|atlas/v2)/entity/guid/[^/]+/businessmetadata,/api/(meta|atlas/v2)/entity/uniqueAttribute/type/[^/]+,/api/(meta|atlas/v2)/search/indexsearch,/api/(meta|atlas/v2)/entity/repairhaslineage,/api/(meta|atlas/v2)/types/typedef/name/[^/]+,/api/(meta|atlas/v2)/types/typedefs,/api/atlas/admin/metrics/prometheus,/api/atlas/admin/pushMetricsToStatsd,/api/atlas/v2/auth/download/policies/[^/]+,/api/atlas/v2/auth/download/roles/[^/]+,/api/atlas/v2/auth/download/users/[^/]+,/api/meta/entity/uniqueAttribute/type/[^/]+,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/groups,/auth/admin/realms/[^/]+/groups/[^/]+/role-mappings/realm,/auth/admin/realms/[^/]+/roles,/auth/admin/realms/[^/]+/roles-by-id/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+/composites,/auth/admin/realms/[^/]+/roles/[^/]+/groups,/auth/admin/realms/[^/]+/roles/[^/]+/users,/auth/admin/realms/[^/]+/users,/auth/admin/realms/[^/]+/users/[^/]+/groups,/auth/admin/realms/[^/]+/users/[^/]+/role-mappings/realm,/auth/realms/[^/]+/protocol/openid-connect/token,/auth/realms/[^/]+/protocol/openid-connect/token/introspect,/users/mappings,/roles/mappings,/api/(meta|atlas/v2)/business-policy/[^/]+/unlink-business-policy,/api/(meta|atlas/v2)/business-policy/link-business-policy,/api/(meta|atlas/v2)/direct/search,/api/(meta|atlas/v2)/attribute/update + + atlas.metrics.method_level.enable=true + atlas.metrics.method_patterns=policiesPrefetchFailed,processTermAssignments,elasticSearchQuery,elasticQueryTimeout,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,scrubEntityHeader,getDiffResult + {{ if .Values.atlas.janusgraph.atomic_mutation }} + ### Atomic batch related configs ### + atlas.graph.storage.cql.atomic-batch-mutate={{ .Values.atlas.janusgraph.atomic_mutation }} + atlas.graph.storage.buffer-size={{ .Values.atlas.janusgraph.janusgraph_tx_buffer_size }} + {{ end }} + + ######### Canary-Release ######### + atlas.canary.keycloak.token-introspection = {{ .Values.atlas.keycloak.token_introspection}} + atlas.keycloak.introspection.use.cache = {{ .Values.atlas.keycloak.introspection_cache }} + + ######### Atlas Inddexsearch configs ######### + atlas.indexsearch.enable.api.limit={{ .Values.atlas.indexsearch.enable_api_limit }} + atlas.indexsearch.query.size.max.limit={{ .Values.atlas.indexsearch.query_size_max_limit }} + atlas.indexsearch.async.enable={{ .Values.atlas.indexsearch.enable_async }} + atlas.indexsearch.async.search.keep.alive.time.in.seconds={{ .Values.atlas.indexsearch.request_timeout_in_secs }} + atlas.indexsearch.enable.janus.optimization={{ .Values.atlas.indexsearch.enable_janus_optimization }} + atlas.indexsearch.enable.janus.optimization.for.relationship={{ .Values.atlas.indexsearch.enable_janus_optimization_for_relationship }} + atlas.indexsearch.enable.janus.optimization.extended={{ .Values.atlas.indexsearch.enable_janus_optimization_extended }} + atlas.indexsearch.enable.janus.optimization.for.classifications={{ .Values.atlas.indexsearch.enable_janus_optimization_for_classifications }} + atlas.indexsearch.enable.janus.optimization.for.lineage={{ .Values.atlas.indexsearch.enable_janus_optimization_for_lineage }} + atlas.jg.super.vertex.edge.count={{ .Values.atlas.jg.super_vertex_edge_count }} + atlas.jg.super.vertex.edge.timeout={{ .Values.atlas.jg.super_vertex_edge_timeout }} + + ######### Atlas Bulk API configs ######### + atlas.bulk.api.max.entities.allowed={{ .Values.atlas.bulk.max_entities_allowed }} + atlas.bulk.api.enable.janus.optimization={{ .Values.atlas.bulk.enable_janus_optimization }} + + ######### Atlas Lineage configs ######### + atlas.lineage.optimised.calculation={{ .Values.atlas.lineage.optimised_calculation }} + atlas.lineage.enable.connection.lineage={{ .Values.atlas.lineage.enable_connection_lineage }} + ######### Atlas Distributed Task configs ######### + atlas.distributed.task.enabled={{ .Values.atlas.distributed_task.enabled }} + {{- if eq .Values.atlas.distributed_task.enabled true }} + atlas.relationship.cleanup.supported.asset.types={{ .Values.atlas.distributed_task.cleanup_supported_asset_types }} + atlas.relationship.cleanup.supported.relationship.labels={{ .Values.atlas.distributed_task.cleanup_supported_relationship_labels }} + {{- end }} + + ######### Atlas Typedefs update configs ######### + atlas.types.update.async.enable={{ .Values.atlas.types_update.async_enable }} + atlas.types.update.thread.count={{ .Values.atlas.types_update.thread_count }} + atlas.typedef.lock.name={{ .Values.atlas.types_update.lock_name }} +{{- end }} + diff --git a/helm/atlas-read/templates/create-atlas-keycloak-config-cm.yaml b/helm/atlas-read/templates/create-atlas-keycloak-config-cm.yaml new file mode 100644 index 00000000000..22401a1a941 --- /dev/null +++ b/helm/atlas-read/templates/create-atlas-keycloak-config-cm.yaml @@ -0,0 +1,34 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: create-atlas-read-keycloak-config-cm + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + create-atlas-keycloak-config.sh: | + cat < /opt/apache-atlas/conf/keycloak.json + { + "realm": "KEYCLOAK_REALM", + "auth-server-url": "AUTH_SERVER_URL", + "ssl-required": "none", + "resource": "KEYCLOAK_CLIENT_ID", + "public-client": false, + "confidential-port": 80, + "principal-attribute": "preferred_username", + "autodetect-bearer-only": true, + "credentials": { + "secret": "KEYCLOAK_CLIENT_SECRET" + } + } + EOF + sed -i "s|KEYCLOAK_REALM|$KEYCLOAK_REALM|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|AUTH_SERVER_URL|$AUTH_SERVER_URL|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|KEYCLOAK_CLIENT_ID|$KEYCLOAK_CLIENT_ID|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|KEYCLOAK_CLIENT_SECRET|$KEYCLOAK_CLIENT_SECRET|g" "/opt/apache-atlas/conf/keycloak.json" + echo "Keycloak Config Created" +{{- end }} diff --git a/helm/atlas-read/templates/deployment.yaml b/helm/atlas-read/templates/deployment.yaml new file mode 100644 index 00000000000..1f71ec45528 --- /dev/null +++ b/helm/atlas-read/templates/deployment.yaml @@ -0,0 +1,242 @@ +{{- if and (.Values.deploy) (.Values.deploy.enabled) }} +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: atlas-read + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "atlas-read-config,atlas-read-logback-config,atlas-read-audit-index,atlas-read-keycloak-config,atlas-read-init-script,atlas-read-init-container-script" + secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-read-keycloak-config" +spec: + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + {{- if not (and (.Values.hpa) (.Values.hpa.enabled)) }} + replicas: {{ .Values.atlas.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + annotations: +{{ toYaml .Values.atlas.podAnnotations | indent 8 }} + spec: + {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} + affinity: + nodeAffinity: + {{- if eq .Values.atlas.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.atlas.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + {{- if eq .Values.atlas.podAntiAffinity true }} + podAntiAffinity: + {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} + {{- if or .Values.atlas.tolerations $multiarchEnabled }} + tolerations: + {{- if .Values.atlas.tolerations }} + {{ toYaml .Values.atlas.tolerations | nindent 8 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} + initContainers: + {{- if .Values.atlas.initContainers }} + {{- toYaml .Values.atlas.initContainers | nindent 8 }} + {{- end }} + serviceAccountName: cinv-sa + containers: + - name: {{ .Chart.Name }}-main + command: [ + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] + image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" + imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} + ports: + - containerPort: {{ .Values.atlas.service.targetPort }} + env: + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: Namespace + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_SERVICE_NAME + value: atlas + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://$(HOST_IP):4317 + - name: OTEL_RESOURCE_ATTRIBUTES + value: >- + k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs + {{- toYaml .Values.atlas.env | nindent 12 }} + {{- if eq .Values.albTenant true }} + - name: ALB_ENABLED + value: "true" + {{- end }} + envFrom: + - secretRef: + name: atlas-read-keycloak-config + {{- if .Values.multitenant }} + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name + {{- end }} + resources: + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if eq $tierType "Enterprise" }} + {{ toYaml .Values.atlas.resources | nindent 12 }} + {{- else if eq $tierType "Basic" }} + {{ toYaml .Values.atlas.resources_basic | nindent 12 }} + {{- else if eq $tierType "Standard" }} + {{ toYaml .Values.atlas.resources_standard | nindent 12 }} + {{- else }} + {{- toYaml .Values.atlas.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: atlas-read-config + mountPath: /opt/apache-atlas/conf/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-read-logback-config + mountPath: /opt/apache-atlas/conf/atlas-logback.xml + subPath: atlas-logback.xml + - name: create-atlas-keycloak-config + mountPath: /create-atlas-keycloak-config.sh + subPath: create-atlas-keycloak-config.sh + - name: atlas-logs + mountPath: /opt/apache-atlas/logs + {{- if .Values.atlas.lifecycle }} + lifecycle: + {{- toYaml .Values.atlas.lifecycle | nindent 12 }} + {{- end }} + {{- if .Values.atlas.livenessProbe }} + livenessProbe: + {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.readinessProbe }} + readinessProbe: + {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegrafd + imagePullPolicy: IfNotPresent + {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} + image: {{ .Values.multiarch.image.telegrafd }} + {{- else }} + image: ghcr.io/atlanhq/telegraf:1.24.3 + {{- end }} + resources: + {{- toYaml .Values.atlas.telegraf.resources | nindent 12 }} + volumeMounts: + - name: telegraf-conf + mountPath: /etc/telegraf/ + ports: + - name: telegrafd + containerPort: 9273 + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} + {{- end }} + volumes: + - name: atlas-logs + emptyDir: {} + - name: atlas-read-config + configMap: + name: atlas-read-config + - name: atlas-read-logback-config + configMap: + name: atlas-read-logback-config + - name: create-atlas-keycloak-config + configMap: + name: create-atlas-read-keycloak-config-cm + defaultMode: 0755 + - name: atlas-read-init-script + configMap: + name: atlas-read-init-script + defaultMode: 0755 + - name: atlas-read-init-container-script + configMap: + name: atlas-read-init-container-script + defaultMode: 0755 + - name: atlas-read-audit-index + configMap: + name: atlas-read-audit-index + defaultMode: 0755 + - name: atlas-config-map-rw-vol + emptyDir: {} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegraf-conf + configMap: + name: atlas-read-telegrafd + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/templates/frontend-service.yaml b/helm/atlas-read/templates/frontend-service.yaml new file mode 100644 index 00000000000..4720228c6d5 --- /dev/null +++ b/helm/atlas-read/templates/frontend-service.yaml @@ -0,0 +1,22 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: Service +metadata: + name: atlas-read-ui-service + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: {{ .Values.atlas.service.type }} + ports: + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atlas-read/templates/hpa.yaml b/helm/atlas-read/templates/hpa.yaml new file mode 100644 index 00000000000..a5abf61df3d --- /dev/null +++ b/helm/atlas-read/templates/hpa.yaml @@ -0,0 +1,33 @@ +{{- if and (.Values.deploy) (.Values.deploy.enabled) (.Values.hpa) (.Values.hpa.enabled) -}} +{{- if and (.Capabilities.APIVersions.Has "autoscaling/v2") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: autoscaling/v2 +{{- else -}} +apiVersion: autoscaling/v2beta2 +{{- end }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Values.hpa.name }} + namespace: {{ .Values.Namespace | default "default" }} + labels: +{{ toYaml .Values.hpa.labels | indent 8 }} +spec: + minReplicas: {{ .Values.atlas.replicaCount }} + maxReplicas: {{ add (int .Values.atlas.replicaCount) 2 }} + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Values.hpa.name }} + metrics: + - type: Resource + resource: + name: memory + target: + averageUtilization: {{ .Values.hpa.memory.averageUtilization }} + type: Utilization + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.hpa.cpu.averageUtilization }} +{{- end }} diff --git a/helm/atlas-read/templates/keycloak-config-secret.yaml b/helm/atlas-read/templates/keycloak-config-secret.yaml new file mode 100644 index 00000000000..786da1932b5 --- /dev/null +++ b/helm/atlas-read/templates/keycloak-config-secret.yaml @@ -0,0 +1,18 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.secrets }} +apiVersion: v1 +kind: Secret +metadata: + name: atlas-read-keycloak-config + namespace: {{ .Values.Namespace | default "default" }} +type: Opaque +stringData: + KEYCLOAK_REALM: {{ .Values.atlas.secrets.KEYCLOAK_REALM }} + AUTH_SERVER_URL: {{ .Values.atlas.secrets.AUTH_SERVER_URL }} + KEYCLOAK_CLIENT_ID: {{ .Values.atlas.secrets.KEYCLOAK_CLIENT_ID }} + {{- if .Values.multitenant }} + {{ else }} + KEYCLOAK_CLIENT_SECRET: {{ .Values.atlas.secrets.KEYCLOAK_CLIENT_SECRET }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/primary-ingress.yaml b/helm/atlas-read/templates/primary-ingress.yaml new file mode 100644 index 00000000000..8ffc910a173 --- /dev/null +++ b/helm/atlas-read/templates/primary-ingress.yaml @@ -0,0 +1,83 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.ingress.enabled -}} +{{- if eq .Values.atlas.Deployment_Type "Development" -}} +{{- $ingressApiIsStable := eq (include "atlas.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "atlas.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "atlas.ingress.supportsPathType" .) "true" -}} +{{- $servicePort := .Values.atlas.service.port -}} +{{- $ingressPath := .Values.atlas.ingress.path -}} +{{- $ingressPathType := .Values.atlas.ingress.pathType -}} +{{- $extraPaths := .Values.atlas.ingress.extraPaths -}} +apiVersion: {{ include "atlas.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: atlas-atlas-read + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- if .Values.atlas.ingress.annotations }} + annotations: + {{- range $key, $value := .Values.atlas.ingress.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.atlas.ingress.ingressClassName }} + ingressClassName: {{ .Values.atlas.ingress.ingressClassName }} + {{- end -}} +{{- if .Values.atlas.ingress.tls }} + tls: + - hosts: + - {{ .Values.atlas.ingress.tls.host }} + secretName: {{ .Values.atlas.ingress.tls.secretName }} +{{- end }} + rules: + {{- if .Values.atlas.ingress.hosts }} + {{- range .Values.atlas.ingress.hosts }} + - host: {{ tpl . $}} + http: + paths: +{{- if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-read-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-read-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-read-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-read-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- if $ingressPath }} + path: {{ $ingressPath }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/service.yaml b/helm/atlas-read/templates/service.yaml new file mode 100644 index 00000000000..94a94ca187c --- /dev/null +++ b/helm/atlas-read/templates/service.yaml @@ -0,0 +1,41 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +apiVersion: v1 +kind: Service +metadata: + name: atlas-read-service-atlas + namespace: {{ .Values.Namespace }} + annotations: + {{- if eq .Values.global.cloud "gcp" }} + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- else if eq .Values.global.cloud "azure" }} + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- else }} + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + {{- if eq .Values.albTenant true }} + type: {{ .Values.atlas.service.type }} + {{- else if eq .Values.global.cloud "gcp" }} + type: ClusterIP + {{- else if eq .Values.global.cloud "azure" }} + type: ClusterIP + {{- else }} + type: ClusterIP + {{- end }} + ports: + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atlas-read/templates/statefulset.yaml b/helm/atlas-read/templates/statefulset.yaml new file mode 100644 index 00000000000..2efdd9c7c09 --- /dev/null +++ b/helm/atlas-read/templates/statefulset.yaml @@ -0,0 +1,246 @@ +{{- if and (not (and (.Values.deploy) (.Values.deploy.enabled))) (or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled)) }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: atlas-read + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "atlas-read-config,atlas-read-logback-config,atlas-read-audit-index,atlas-read-keycloak-config,atlas-read-init-script,atlas-read-init-container-script" + secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-read-keycloak-config" + argocd.argoproj.io/sync-wave: "1" +spec: + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + replicas: {{ .Values.atlas.replicaCount }} + serviceName: "atlas-read-service-atlas" + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + annotations: +{{ toYaml .Values.atlas.podAnnotations | indent 8 }} + spec: + {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} + affinity: + nodeAffinity: + {{- if eq .Values.atlas.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.atlas.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- if and .Values.multiarch (hasKey .Values.multiarch "enabled") (eq .Values.multiarch.enabled true) }} + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + {{- end }} + {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + {{- if eq .Values.atlas.podAntiAffinity true }} + podAntiAffinity: + {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} + {{- if or .Values.atlas.tolerations $multiarchEnabled }} + tolerations: + {{- if .Values.atlas.tolerations }} + {{ toYaml .Values.atlas.tolerations | nindent 8 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} + initContainers: + {{- if .Values.atlas.initContainers }} + {{- toYaml .Values.atlas.initContainers | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }}-main + command: [ + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] + image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" + imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} + ports: + - containerPort: {{ .Values.atlas.service.targetPort }} + env: + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: Namespace + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_SERVICE_NAME + value: atlas + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://$(HOST_IP):4317 + - name: OTEL_RESOURCE_ATTRIBUTES + value: >- + k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs + {{- toYaml .Values.atlas.env | nindent 12 }} + {{- if eq .Values.albTenant true }} + - name: ALB_ENABLED + value: "true" + {{- end }} + envFrom: + - secretRef: + name: atlas-read-keycloak-config + {{- if .Values.multitenant }} + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name + {{- end }} + resources: + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if eq $tierType "Enterprise" }} + {{ toYaml .Values.atlas.resources | nindent 12 }} + {{- else if eq $tierType "Basic" }} + {{ toYaml .Values.atlas.resources_basic | nindent 12 }} + {{- else if eq $tierType "Standard" }} + {{ toYaml .Values.atlas.resources_standard | nindent 12 }} + {{- else }} + {{- toYaml .Values.atlas.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: atlas-read-config + mountPath: /opt/apache-atlas/conf/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-read-logback-config + mountPath: /opt/apache-atlas/conf/atlas-logback.xml + subPath: atlas-logback.xml + - name: create-atlas-keycloak-config + mountPath: /create-atlas-keycloak-config.sh + subPath: create-atlas-keycloak-config.sh + - name: atlas-logs + mountPath: /opt/apache-atlas/logs + {{- if .Values.atlas.lifecycle }} + lifecycle: + {{- toYaml .Values.atlas.lifecycle | nindent 12 }} + {{- end }} + {{- if .Values.atlas.livenessProbe }} + livenessProbe: + {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.readinessProbe }} + readinessProbe: + {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegrafd + imagePullPolicy: IfNotPresent + {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} + image: {{ .Values.multiarch.image.telegrafd }} + {{- else }} + image: ghcr.io/atlanhq/telegraf:1.24.3 + {{- end }} + volumeMounts: + - name: telegraf-conf + mountPath: /etc/telegraf/ + ports: + - name: telegrafd + containerPort: 9273 + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} + {{- end }} + volumes: + - name: atlas-logs + emptyDir: {} + - name: atlas-read-config + configMap: + name: atlas-read-config + - name: atlas-read-logback-config + configMap: + name: atlas-read-logback-config + - name: create-atlas-keycloak-config + configMap: + name: create-atlas-read-keycloak-config-cm + defaultMode: 0755 + - name: atlas-read-init-script + configMap: + name: atlas-read-init-script + defaultMode: 0755 + - name: atlas-read-init-container-script + configMap: + name: atlas-read-init-container-script + defaultMode: 0755 + - name: atlas-read-audit-index + configMap: + name: atlas-read-audit-index + defaultMode: 0755 + - name: atlas-config-map-rw-vol + emptyDir: {} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegraf-conf + configMap: + name: atlas-read-telegrafd + {{- end }} +{{- end }} diff --git a/helm/atlas-read/templates/statsd-cronjob.yaml b/helm/atlas-read/templates/statsd-cronjob.yaml new file mode 100644 index 00000000000..5660a22ec49 --- /dev/null +++ b/helm/atlas-read/templates/statsd-cronjob.yaml @@ -0,0 +1,65 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.statsdJob.enabled }} +{{- if and (.Capabilities.APIVersions.Has "batch/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: batch/v1 +{{- else -}} +apiVersion: batch/v1beta1 +{{- end }} +kind: CronJob +metadata: + name: atlas-read-statsd-job + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + concurrencyPolicy: Replace + schedule: "{{ .Values.atlas.statsdJob.schedule }}" + jobTemplate: + spec: + template: + spec: + {{- if or (and .Values.multiarch (eq .Values.multiarch.enabled true)) .Values.atlas.tolerations }} + tolerations: + {{- if and .Values.multiarch (eq .Values.multiarch.enabled true) }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- if .Values.atlas.tolerations }} + {{ toYaml .Values.atlas.tolerations | nindent 10 }} + {{- end }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 12 }} + {{- end }} + containers: + - name: metrics-job + {{- if and .Values.multiarch.enabled .Values.multiarch.image.alpine_curl }} + image: {{ .Values.multiarch.image.alpine_curl }} + {{- else }} + image: ghcr.io/atlanhq/alpine-curl:3.14 + {{- end }} + command: + - sh + - -c + - | + echo "[+] Checking for Atlas" + until printf "." && curl -X GET "http://atlas-read-service-atlas.atlas.svc.cluster.local/api/atlas/admin/health"; do + sleep 2; + done; + echo 'Atlas OK ✓' + curl -X GET "http://atlas-read-service-atlas.atlas.svc.cluster.local/api/atlas/admin/pushMetricsToStatsd" + restartPolicy: Never +{{- end -}} +{{- end }} diff --git a/helm/atlas-read/templates/telegraf-config.yaml b/helm/atlas-read/templates/telegraf-config.yaml new file mode 100644 index 00000000000..328c2587b73 --- /dev/null +++ b/helm/atlas-read/templates/telegraf-config.yaml @@ -0,0 +1,133 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.telegraf.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-read-telegrafd + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + telegraf.conf: |- + # Telegraf Configuration + # Global Agent Configuration + [agent] + interval = "10s" # Default data collection interval + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "" + # debug = false + # quiet = false + hostname = "" # Will be set by Telegraf automatically + omit_hostname = false + [global_tags] + deployment="atlas" + [[inputs.http]] + urls = ["http://127.0.0.1:21000/api/atlas/admin/metrics/prometheus"] + data_format = "prometheus" + name_override = "atlas-metastore" + [[inputs.statsd]] + service_address = ":8125" + [[inputs.jolokia2_agent]] + name_override = "atlas" + urls = ["http://127.0.0.1:7777/jolokia"] + [[inputs.jolokia2_agent.metric]] + name = "heap_memory_usage" + mbean = "java.lang:type=Memory" + paths = ["HeapMemoryUsage"] + field_prefix = "memory_" + [[inputs.jolokia2_agent.metric]] + name = "non_heap_memory_usage" + mbean = "java.lang:type=Memory" + paths = ["NonHeapMemoryUsage"] + field_prefix = "memory_" + [[inputs.jolokia2_agent.metric]] + name = "thread_count" + mbean = "java.lang:type=Threading" + paths = ["TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"] + field_prefix = "thread_" + [[inputs.jolokia2_agent.metric]] + name = "class_count" + mbean = "java.lang:type=ClassLoading" + paths = ["LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"] + field_prefix = "class_" + [[inputs.jolokia2_agent.metric]] + name = "os" + mbean = "java.lang:type=OperatingSystem" + paths = ["MaxFileDescriptorCount,ProcessCpuTime,AvailableProcessors,SystemCpuLoad,TotalSwapSpaceSize,OpenFileDescriptorCount,FreePhysicalMemorySize,CommittedVirtualMemorySize,ProcessCpuLoad,FreeSwapSpaceSize,TotalPhysicalMemorySize"] + field_prefix = "os_" + [[inputs.jolokia2_agent.metric]] + name = "tenured_gen_memorypool" + mbean = "java.lang:name=Tenured Gen,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,UsageThresholdCount,UsageThreshold,PeakUsage"] + field_prefix = "memory_tenured_gen_" + [[inputs.jolokia2_agent.metric]] + name = "par_eden_space_gen_memorypool" + mbean = "java.lang:name=Par Eden Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_par_eden_" + [[inputs.jolokia2_agent.metric]] + name = "par_survivor_memorypool" + mbean = "java.lang:name=Par Survivor Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_par_survivor_" + [[inputs.jolokia2_agent.metric]] + name = "g1_survivor_memorypool" + mbean = "java.lang:name=G1 Survivor Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_g1_survivor_" + [[inputs.jolokia2_agent.metric]] + name = "g1_eden_memorypool" + mbean = "java.lang:name=G1 Eden Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_g1_eden_" + [[inputs.jolokia2_agent.metric]] + name = "g1_oldgen_memorypool" + mbean = "java.lang:name=G1 Old Gen,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,UsageThresholdCount,UsageThreshold,PeakUsage"] + field_prefix = "memory_g1_oldgen_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_parnew" + mbean = "java.lang:name=ParNew,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_parnew_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_marksweep" + mbean = "java.lang:name=MarkSweepCompact,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_marksweep_" + [[inputs.jolokia2_agent.metric]] + name = "jvm_runtime" + mbean = "java.lang:type=Runtime" + paths = ["Uptime"] + field_prefix = "uptime_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_g1_young" + mbean = "java.lang:name=G1 Young Generation,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_g1_young_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_g1_old" + mbean = "java.lang:name=G1 Old Generation,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_g1_old_" + [[inputs.jolokia2_agent.metric]] + name = "janusgraph_metrics" + mbean = "metrics:name=org.janusgraph.*,*" + field_prefix = "$1" + paths = ["Max", "Min", "Mean", "StdDev", "50thPercentile", + "75thPercentile", "95thPercentile", "98thPercentile", + "99thPercentile", "999thPercentile", "Count", "FifteenMinuteRate", + "FiveMinuteRate", "MeanRate", "OneMinuteRate", "SnapshotSize"] + [[outputs.prometheus_client]] + ## Address to listen on. + listen = ":9273" +{{- end }} +{{- end }} diff --git a/helm/atlas-read/values.yaml b/helm/atlas-read/values.yaml new file mode 100644 index 00000000000..bf2984701e0 --- /dev/null +++ b/helm/atlas-read/values.yaml @@ -0,0 +1,462 @@ + +multiarch: + enabled: false + image: {} + +# Default values for atlas. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + Tier_Type: "" + cloud: "" + tenantName: "" + svcIsolation: + enabled: false + atlasNginx: + enabled: false + globalSvcIsolation: + enabled: false + esIsolation: + enabled: false +hpa: + enabled: false + name: atlas-read + labels: + app: atlas-read + cpu: + averageUtilization: 85 + memory: + averageUtilization: 85 + +Namespace: atlas +sentry_flag: disable +albTenant: false +podDisruptionBudget: + enabled: true + minAvailable: "1" +atlas: + cache: + enabled: false + podAntiAffinity: true + custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + sentry: + sampleRate: 0.5 + ranger: + RANGER_PASSWORD: '{{repl ConfigOption "RangerPassword"}}' + RANGER_SERVICE_URL: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + multitenant: '' + Deployment_Type: '' + replicaCount: 2 + config: + entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" + janusgraph: + atomic_mutation: true + janusgraph_tx_buffer_size: 8192 + keycloak: + token_introspection: true + introspection_cache: false + indexsearch: + enable_api_limit: false + query_size_max_limit: 100000 + enable_async: true + request_timeout_in_secs: 60 + enable_janus_optimization: true + enable_janus_optimization_for_relationship: true + enable_janus_optimization_for_classifications: false + enable_janus_optimization_extended: true + enable_janus_optimization_for_lineage: false + jg: + super_vertex_edge_count: 100000 + super_vertex_edge_timeout: 30 + bulk: + max_entities_allowed: 10000 + enable_janus_optimization: true + lineage: + optimised_calculation: true + enable_connection_lineage: false + authorizer: + enable_delta_based_refresh: true + enable_abac: true + index: + audit_index_field_limit: 10000 + audit_index_refresh_interval: 1s + distributed_task: + enabled: false + cleanup_supported_asset_types: "Process,AirflowTask" + cleanup_supported_relationship_labels: "__Process.inputs,__Process.outputs,__AirflowTask.inputs,__AirflowTask.outputs" + types_update: + async_enable: true + thread_count: 5 + lock_name: "atlas-read:type-def:lock" + + podAnnotations: + backup.velero.io/backup-volumes-excludes: master + + image: + repository: ghcr.io/atlanhq/atlas-metastore-ATLAS_BRANCH_NAME + tag: ATLAS_LATEST_IMAGE_TAG + pullPolicy: IfNotPresent + imagePullSecrets: {} + tolerations: [] + + # Affinity rules for atlas + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: atlas + topologyKey: kubernetes.io/hostname + + # Kubernetes service for atlas + service: + portName: atlas + type: ClusterIP + path: /api/atlas/v2/ + port: 80 + targetPort: 21000 + + + # kubernetes lifecycle hooks + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - curl -X GET http://localhost:21000/api/atlas/admin/killtheleader + + # Kubernetes ingress for atlas + # Primary ingress. all traffic is switched to secondary ingress. + ingress: + enabled: false + serviceName: atlas-read-ui-service + annotations: + kubernetes.io/ingress.class: "kong" + konghq.com/preserve-host: "true" + konghq.com/plugins: keycloak-jwt, xss + labels: {} + path: / + # pathType is only for k8s >= 1.1= + pathType: ImplementationSpecific + hosts: [] + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + tls: {} + # Secrets for SSl + tlsSecrets: + tls.key: '' + tls.crt: '' + + # Healthcheck ingress data. + healthcheckIngress: + enabled: true + annotations: + kubernetes.io/ingress.class: "kong" + ## Path for grafana ingress + path: /api/atlas/admin/status + # pathType is only for k8s > 1.19 + pathType: Prefix + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + + # Secondary ingress which can be used to provide access on /atlas path + secondaryIngress: + enabled: true + # Used to create an Ingress record. + hosts: [] + ## Path for grafana ingress + path: /api/meta/ + # pathType is only for k8s > 1.19 + pathType: Prefix + labels: {} + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + annotations: + kubernetes.io/ingress.class: "kong" + konghq.com/strip-path: "true" + konghq.com/preserve-host: "true" + konghq.com/plugins: keycloak-jwt, xss + tls: [] + # Secrets for SSl + tlsSecrets: + tls.key: '' + tls.crt: '' + + # Node selector config for atlas statefulset + nodeSelector: {} + priorityClassName: "" + # Init container for atlas. Right now all checks are combined into one init container to reduce atlas start time. + initContainers: + - name: init-container-bundle + image: ghcr.io/atlanhq/alpine-python-atlan-v2:3.9.21 + imagePullPolicy: IfNotPresent + volumeMounts: + - name: atlas-read-init-script + mountPath: /tmp/atlas-init.sh + subPath: atlas-init.sh + - name: atlas-read-config + mountPath: /tmp/configfile/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-config-map-rw-vol + mountPath: /tmp/newconfigfile + - name: atlas-read-audit-index + mountPath: /scripts/atlas-audit.sh + subPath: atlas-audit.sh + - name: atlas-read-init-container-script + mountPath: /scripts/atlas-init-container.sh + subPath: atlas-init-container.sh + env: + - name: ATLAS_SERVICE_NAME + value: 'atlas' + - name: RANGER_SERVICE_URL + value: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + - name: RANGER_USERNAME + value: '' + - name: RANGER_PASSWORD + value: '' + - name: KEYCLOAK_ADDRESS + value: 'http://keycloak-http.keycloak.svc.cluster.local/auth' + command: + - /scripts/atlas-init-container.sh + + + resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 3000m + memory: 8Gi + requests: + cpu: 3000m + memory: 8Gi + + resources_basic: + limits: + memory: 4Gi + requests: + memory: 20Mi + + resources_standard: + limits: + memory: 6Gi + requests: + memory: 20Mi + + # Liveness and readiness probes for atlas + livenessProbe: + failureThreshold: 3 + httpGet: + path: /api/atlas/admin/health + port: 21000 + scheme: HTTP + initialDelaySeconds: 720 + periodSeconds: 60 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /api/atlas/admin/health + port: 21000 + scheme: HTTP + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 60 + successThreshold: 1 + timeoutSeconds: 5 + + env: + - name: ATLAS_SERVER_OPTS + value: '-Xmx4g -Xms4g' + - name: MAVEN_OPTS + value: '-Xmx4g -Xms4g' + - name: ATLAS_CLIENT_OPTS + value: '-Xmx1g -Xms1g' + - name: RANGER_SERVICE_URL + value: 'http://ranger-service.ranger.svc.cluster.local:80/api/policy' + - name: ATLAS_REPOSITORY_NAME + value: "atlas" + - name: ATLAS_USE_LEGACY_SEARCH + value: "false" + + + # We are using these in configmap for atlas-keycloak + secrets: + AUTH_SERVER_URL: '' + KEYCLOAK_REALM: '' + KEYCLOAK_CLIENT_ID: '' + KEYCLOAK_CLIENT_SECRET: '' + SENTRY_DSN_SECRET: '' + SENTRY_DSN_DEV: '' + SENTRY_DSN_PROD: '' + INSTANCE_NAME: '' + + # Redis config for atlas + # This is used in atlas configmap + redis: + enabled: true + host: ${USER_REDIS_HOST} + port: ${USER_REDIS_PORT} + sentinel_urls: ${USER_REDIS_SENTINEL_HOSTS} + master_name: ${USER_REDIS_MASTER_SET_NAME} + password: ${MASTER_PASSWORD} + username: ${USER_REDIS} + maxConnections: 100 + timeout: 100000 + + # Pod monitor to send metrics from telegraf to prometheus + podMonitor: + ## If true, a PodMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + enabled: true + namespace: monitoring + labels: + app: prometheus-operator + release: prometheus-operator + interval: 30s + scrapeTimeout: 10s + scheme: http + relabelings: [] + + # Flag to enable telegraf sidecar for metrics + telegraf: + enabled: true + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 300m + memory: 256Mi + + # Flag to enable statsD cronjob and schedule + statsdJob: + enabled: true + schedule: '*/10 * * * *' + + # Used in atlas configmaps + # can be used to setup slack notifications + notification: + slackWebhook: '' + +cassandra: + + updateStrategy: + type: RollingUpdate + + resources: + requests: + memory: 4Gi + #cpu: 1500m + limits: + memory: 5Gi + #cpu: 2000m + + # Config for cassandra + + max_heap_size: 2048M + heap_new_size: 512M + + config: + cluster_domain: cluster.local + cluster_name: cassandra + cluster_size: 3 + seed_size: 3 + start_rpc: true + ports: + cql: 9042 + + + + # Persistence changes for cassandra + persistence: + enabled: true + accessMode: ReadWriteOnce + size: 10Gi + + nodeSelector: {} + # nodegroup: atlan-atlas + + ## Affinity for pod assignment + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + # Cassandra exporter configuration + exporter: + enabled: true + serviceMonitor: + enabled: false + additionalLabels: + release: "prometheus-operator" + # prometheus: default + image: + repo: ghcr.io/atlanhq/cassandra_exporter + tag: 2.0.2 + jvmOpts: "" + resources: + limits: + #cpu: 200m + memory: 500Mi + requests: + #cpu: 100m + memory: 200Mi + podAnnotations: {} + + # Cassandra backup configuration + backup: + enabled: false + schedule: + - keyspace: atlas + cron: "0 3 * * *" + annotations: + iam.amazonaws.com/role: "" + image: + repository: ghcr.io/atlanhq/cain + tag: 0.6.0 + # Name of the secret containing the credentials of the service account used by GOOGLE_APPLICATION_CREDENTIALS, as a credentials.json file + extraArgs: + - -c + - atlas-cassandra + google: + serviceAccountSecret: + env: + - name: AWS_REGION + value: "" + resources: + requests: + memory: 1Gi + #cpu: 1 + limits: + memory: 1Gi + #cpu: 1 + destination: "" diff --git a/helm/atlas/Chart.yaml b/helm/atlas/Chart.yaml new file mode 100644 index 00000000000..d9776a78d57 --- /dev/null +++ b/helm/atlas/Chart.yaml @@ -0,0 +1,28 @@ +apiVersion: v2 +name: atlas +description: Apache Atlas Metadata Management and Governance Platform +type: application +version: 1.0.0 +appVersion: "3.0.0" # Will be updated by CI with commit ID +maintainers: + - name: Atlan Engineering + email: engineering@atlan.com +keywords: + - atlas + - metadata + - governance + - data-catalog + - apache-atlas +sources: + - https://github.com/atlanhq/atlas-metastore +home: https://github.com/atlanhq/atlas-metastore +dependencies: + - name: cassandra + repository: file://./charts/cassandra + version: 0.x.x + - name: elasticsearch + repository: file://./charts/elasticsearch + version: 7.x.x + - name: logstash + repository: file://./charts/logstash + version: 9.x.x diff --git a/helm/atlas/README.md b/helm/atlas/README.md new file mode 100644 index 00000000000..d8be6656da1 --- /dev/null +++ b/helm/atlas/README.md @@ -0,0 +1,2 @@ +# atlas +This chart will install the apache atlas which use elasticsearch and cassandra. diff --git a/helm/atlas/charts/cassandra/.helmignore b/helm/atlas/charts/cassandra/.helmignore new file mode 100755 index 00000000000..5e03def0cfb --- /dev/null +++ b/helm/atlas/charts/cassandra/.helmignore @@ -0,0 +1,17 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +OWNERS diff --git a/helm/atlas/charts/cassandra/Chart.yaml b/helm/atlas/charts/cassandra/Chart.yaml new file mode 100755 index 00000000000..25e89e03bca --- /dev/null +++ b/helm/atlas/charts/cassandra/Chart.yaml @@ -0,0 +1,19 @@ +apiVersion: v2 +appVersion: 3.11.5 +description: Apache Cassandra is a free and open-source distributed database management + system designed to handle large amounts of data across many commodity servers, providing + high availability with no single point of failure. +engine: gotpl +home: http://cassandra.apache.org +icon: https://upload.wikimedia.org/wikipedia/commons/thumb/5/5e/Cassandra_logo.svg/330px-Cassandra_logo.svg.png +keywords: +- cassandra +- database +- nosql +maintainers: +- email: goonohc@gmail.com + name: KongZ +- email: maor.friedman@redhat.com + name: maorfr +name: cassandra +version: 0.14.4 diff --git a/helm/atlas/charts/cassandra/README.md b/helm/atlas/charts/cassandra/README.md new file mode 100755 index 00000000000..796fe331d2d --- /dev/null +++ b/helm/atlas/charts/cassandra/README.md @@ -0,0 +1,215 @@ +# Cassandra +A Cassandra Chart for Kubernetes + +## Install Chart +To install the Cassandra Chart into your Kubernetes cluster (This Chart requires persistent volume by default, you may need to create a storage class before install chart. To create storage class, see [Persist data](#persist_data) section) + +```bash +helm install --namespace "cassandra" -n "cassandra" incubator/cassandra +``` + +After installation succeeds, you can get a status of Chart + +```bash +helm status "cassandra" +``` + +If you want to delete your Chart, use this command +```bash +helm delete --purge "cassandra" +``` + +## Upgrading + +To upgrade your Cassandra release, simply run + +```bash +helm upgrade "cassandra" incubator/cassandra +``` + +### 0.12.0 + +This version fixes https://github.com/helm/charts/issues/7803 by removing mutable labels in `spec.VolumeClaimTemplate.metadata.labels` so that it is upgradable. + +Until this version, in order to upgrade, you have to delete the Cassandra StatefulSet before upgrading: +```bash +$ kubectl delete statefulset --cascade=false my-cassandra-release +``` + + +## Persist data +You need to create `StorageClass` before able to persist data in persistent volume. +To create a `StorageClass` on Google Cloud, run the following + +```bash +kubectl create -f sample/create-storage-gce.yaml +``` + +And set the following values in `values.yaml` + +```yaml +persistence: + enabled: true +``` + +If you want to create a `StorageClass` on other platform, please see documentation here [https://kubernetes.io/docs/user-guide/persistent-volumes/](https://kubernetes.io/docs/user-guide/persistent-volumes/) + +When running a cluster without persistence, the termination of a pod will first initiate a decommissioning of that pod. +Depending on the amount of data stored inside the cluster this may take a while. In order to complete a graceful +termination, pods need to get more time for it. Set the following values in `values.yaml`: + +```yaml +podSettings: + terminationGracePeriodSeconds: 1800 +``` + +## Install Chart with specific cluster size +By default, this Chart will create a cassandra with 3 nodes. If you want to change the cluster size during installation, you can use `--set config.cluster_size={value}` argument. Or edit `values.yaml` + +For example: +Set cluster size to 5 + +```bash +helm install --namespace "cassandra" -n "cassandra" --set config.cluster_size=5 incubator/cassandra/ +``` + +## Install Chart with specific resource size +By default, this Chart will create a cassandra with CPU 2 vCPU and 4Gi of memory which is suitable for development environment. +If you want to use this Chart for production, I would recommend to update the CPU to 4 vCPU and 16Gi. Also increase size of `max_heap_size` and `heap_new_size`. +To update the settings, edit `values.yaml` + +## Install Chart with specific node +Sometime you may need to deploy your cassandra to specific nodes to allocate resources. You can use node selector by edit `nodes.enabled=true` in `values.yaml` +For example, you have 6 vms in node pools and you want to deploy cassandra to node which labeled as `cloud.google.com/gke-nodepool: pool-db` + +Set the following values in `values.yaml` + +```yaml +nodes: + enabled: true + selector: + nodeSelector: + cloud.google.com/gke-nodepool: pool-db +``` + +## Configuration + +The following table lists the configurable parameters of the Cassandra chart and their default values. + +| Parameter | Description | Default | +| ----------------------- | --------------------------------------------- | ---------------------------------------------------------- | +| `image.repo` | `cassandra` image repository | `cassandra` | +| `image.tag` | `cassandra` image tag | `3.11.5` | +| `image.pullPolicy` | Image pull policy | `Always` if `imageTag` is `latest`, else `IfNotPresent` | +| `image.pullSecrets` | Image pull secrets | `nil` | +| `config.cluster_domain` | The name of the cluster domain. | `cluster.local` | +| `config.cluster_name` | The name of the cluster. | `cassandra` | +| `config.cluster_size` | The number of nodes in the cluster. | `3` | +| `config.seed_size` | The number of seed nodes used to bootstrap new clients joining the cluster. | `2` | +| `config.seeds` | The comma-separated list of seed nodes. | Automatically generated according to `.Release.Name` and `config.seed_size` | +| `config.num_tokens` | Initdb Arguments | `256` | +| `config.dc_name` | Initdb Arguments | `DC1` | +| `config.rack_name` | Initdb Arguments | `RAC1` | +| `config.endpoint_snitch` | Initdb Arguments | `SimpleSnitch` | +| `config.max_heap_size` | Initdb Arguments | `2048M` | +| `config.heap_new_size` | Initdb Arguments | `512M` | +| `config.ports.cql` | Initdb Arguments | `9042` | +| `config.ports.thrift` | Initdb Arguments | `9160` | +| `config.ports.agent` | The port of the JVM Agent (if any) | `nil` | +| `config.start_rpc` | Initdb Arguments | `false` | +| `configOverrides` | Overrides config files in /etc/cassandra dir | `{}` | +| `commandOverrides` | Overrides default docker command | `[]` | +| `argsOverrides` | Overrides default docker args | `[]` | +| `env` | Custom env variables | `{}` | +| `schedulerName` | Name of k8s scheduler (other than the default) | `nil` | +| `persistence.enabled` | Use a PVC to persist data | `true` | +| `persistence.storageClass` | Storage class of backing PVC | `nil` (uses alpha storage class annotation) | +| `persistence.accessMode` | Use volume as ReadOnly or ReadWrite | `ReadWriteOnce` | +| `persistence.size` | Size of data volume | `10Gi` | +| `resources` | CPU/Memory resource requests/limits | Memory: `4Gi`, CPU: `2` | +| `service.type` | k8s service type exposing ports, e.g. `NodePort`| `ClusterIP` | +| `service.annotations` | Annotations to apply to cassandra service | `""` | +| `podManagementPolicy` | podManagementPolicy of the StatefulSet | `OrderedReady` | +| `podDisruptionBudget` | Pod distruption budget | `{}` | +| `podAnnotations` | pod annotations for the StatefulSet | `{}` | +| `updateStrategy.type` | UpdateStrategy of the StatefulSet | `OnDelete` | +| `livenessProbe.initialDelaySeconds` | Delay before liveness probe is initiated | `90` | +| `livenessProbe.periodSeconds` | How often to perform the probe | `30` | +| `livenessProbe.timeoutSeconds` | When the probe times out | `5` | +| `livenessProbe.successThreshold` | Minimum consecutive successes for the probe to be considered successful after having failed. | `1` | +| `livenessProbe.failureThreshold` | Minimum consecutive failures for the probe to be considered failed after having succeeded. | `3` | +| `readinessProbe.initialDelaySeconds` | Delay before readiness probe is initiated | `90` | +| `readinessProbe.periodSeconds` | How often to perform the probe | `30` | +| `readinessProbe.timeoutSeconds` | When the probe times out | `5` | +| `readinessProbe.successThreshold` | Minimum consecutive successes for the probe to be considered successful after having failed. | `1` | +| `readinessProbe.failureThreshold` | Minimum consecutive failures for the probe to be considered failed after having succeeded. | `3` | +| `readinessProbe.address` | Address to use for checking node has joined the cluster and is ready. | `${POD_IP}` | +| `rbac.create` | Specifies whether RBAC resources should be created | `true` | +| `serviceAccount.create` | Specifies whether a ServiceAccount should be created | `true` | +| `serviceAccount.name` | The name of the ServiceAccount to use | | +| `backup.enabled` | Enable backup on chart installation | `false` | +| `backup.schedule` | Keyspaces to backup, each with cron time | | +| `backup.annotations` | Backup pod annotations | iam.amazonaws.com/role: `cain` | +| `backup.image.repository` | Backup image repository | `maorfr/cain` | +| `backup.image.tag` | Backup image tag | `0.6.0` | +| `backup.extraArgs` | Additional arguments for cain | `[]` | +| `backup.env` | Backup environment variables | AWS_REGION: `us-east-1` | +| `backup.resources` | Backup CPU/Memory resource requests/limits | Memory: `1Gi`, CPU: `1` | +| `backup.destination` | Destination to store backup artifacts | `s3://bucket/cassandra` | +| `backup.google.serviceAccountSecret` | Secret containing credentials if GCS is used as destination | | +| `exporter.enabled` | Enable Cassandra exporter | `false` | +| `exporter.servicemonitor.enabled` | Enable ServiceMonitor for exporter | `true` | +| `exporter.servicemonitor.additionalLabels`| Additional labels for Service Monitor | `{}` | +| `exporter.image.repo` | Exporter image repository | `criteord/cassandra_exporter` | +| `exporter.image.tag` | Exporter image tag | `2.0.2` | +| `exporter.port` | Exporter port | `5556` | +| `exporter.jvmOpts` | Exporter additional JVM options | | +| `exporter.resources` | Exporter CPU/Memory resource requests/limits | `{}` | +| `affinity` | Kubernetes node affinity | `{}` | +| `tolerations` | Kubernetes node tolerations | `[]` | + + +## Scale cassandra +When you want to change the cluster size of your cassandra, you can use the helm upgrade command. + +```bash +helm upgrade --set config.cluster_size=5 cassandra incubator/cassandra +``` + +## Get cassandra status +You can get your cassandra cluster status by running the command + +```bash +kubectl exec -it --namespace cassandra $(kubectl get pods --namespace cassandra -l app=cassandra-cassandra -o jsonpath='{.items[0].metadata.name}') nodetool status +``` + +Output +```bash +Datacenter: asia-east1 +====================== +Status=Up/Down +|/ State=Normal/Leaving/Joining/Moving +-- Address Load Tokens Owns (effective) Host ID Rack +UN 10.8.1.11 108.45 KiB 256 66.1% 410cc9da-8993-4dc2-9026-1dd381874c54 a +UN 10.8.4.12 84.08 KiB 256 68.7% 96e159e1-ef94-406e-a0be-e58fbd32a830 c +UN 10.8.3.6 103.07 KiB 256 65.2% 1a42b953-8728-4139-b070-b855b8fff326 b +``` + +## Benchmark +You can use [cassandra-stress](https://docs.datastax.com/en/cassandra/3.0/cassandra/tools/toolsCStress.html) tool to run the benchmark on the cluster by the following command + +```bash +kubectl exec -it --namespace cassandra $(kubectl get pods --namespace cassandra -l app=cassandra-cassandra -o jsonpath='{.items[0].metadata.name}') cassandra-stress +``` + +Example of `cassandra-stress` argument + - Run both read and write with ration 9:1 + - Operator total 1 million keys with uniform distribution + - Use QUORUM for read/write + - Generate 50 threads + - Generate result in graph + - Use NetworkTopologyStrategy with replica factor 2 + +```bash +cassandra-stress mixed ratio\(write=1,read=9\) n=1000000 cl=QUORUM -pop dist=UNIFORM\(1..1000000\) -mode native cql3 -rate threads=50 -log file=~/mixed_autorate_r9w1_1M.log -graph file=test2.html title=test revision=test2 -schema "replication(strategy=NetworkTopologyStrategy, factor=2)" +``` diff --git a/helm/atlas/charts/cassandra/sample/create-storage-gce.yaml b/helm/atlas/charts/cassandra/sample/create-storage-gce.yaml new file mode 100755 index 00000000000..2467b95227e --- /dev/null +++ b/helm/atlas/charts/cassandra/sample/create-storage-gce.yaml @@ -0,0 +1,7 @@ +kind: StorageClass +apiVersion: storage.k8s.io/v1 +metadata: + name: generic +provisioner: kubernetes.io/gce-pd +parameters: + type: pd-ssd diff --git a/helm/atlas/charts/cassandra/templates/NOTES.txt b/helm/atlas/charts/cassandra/templates/NOTES.txt new file mode 100755 index 00000000000..9ecb0045ddd --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/NOTES.txt @@ -0,0 +1,35 @@ +Cassandra CQL can be accessed via port {{ .Values.config.ports.cql }} on the following DNS name from within your cluster: +Cassandra Thrift can be accessed via port {{ .Values.config.ports.thrift }} on the following DNS name from within your cluster: + +If you want to connect to the remote instance with your local Cassandra CQL cli. To forward the API port to localhost:9042 run the following: +- kubectl port-forward --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{ .items[0].metadata.name }') 9042:{{ .Values.config.ports.cql }} + +If you want to connect to the Cassandra CQL run the following: +{{- if contains "NodePort" .Values.service.type }} +- export CQL_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ template "cassandra.fullname" . }}) +- export CQL_HOST=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") +- cqlsh $CQL_HOST $CQL_PORT + +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + Watch the status with: 'kubectl get svc --namespace {{ .Release.Namespace }} -w {{ template "cassandra.fullname" . }}' +- export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ template "cassandra.fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') +- echo cqlsh $SERVICE_IP +{{- else if contains "ClusterIP" .Values.service.type }} +- kubectl port-forward --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "cassandra.name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") 9042:{{ .Values.config.ports.cql }} + echo cqlsh 127.0.0.1 9042 +{{- end }} + +You can also see the cluster status by run the following: +- kubectl exec -it --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{.items[0].metadata.name}') nodetool status + +To tail the logs for the Cassandra pod run the following: +- kubectl logs -f --namespace {{ .Release.Namespace }} $(kubectl get pods --namespace {{ .Release.Namespace }} -l app={{ template "cassandra.name" . }},release={{ .Release.Name }} -o jsonpath='{ .items[0].metadata.name }') + +{{- if not .Values.persistence.enabled }} + +Note that the cluster is running with node-local storage instead of PersistentVolumes. In order to prevent data loss, +pods will be decommissioned upon termination. Decommissioning may take some time, so you might also want to adjust the +pod termination gace period, which is currently set to {{ .Values.podSettings.terminationGracePeriodSeconds }} seconds. + +{{- end}} diff --git a/helm/atlas/charts/cassandra/templates/_helpers.tpl b/helm/atlas/charts/cassandra/templates/_helpers.tpl new file mode 100755 index 00000000000..b8704209bf4 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/_helpers.tpl @@ -0,0 +1,43 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cassandra.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cassandra.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cassandra.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "cassandra.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "cassandra.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml b/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml new file mode 100755 index 00000000000..efee5e96e55 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml @@ -0,0 +1,102 @@ +{{- if .Values.backup.enabled }} +{{- $release := .Release }} +{{- $values := .Values }} +{{- $backup := $values.backup }} +{{- range $index, $schedule := $backup.schedule }} +--- +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: {{ template "cassandra.fullname" $ }}-backup + namespace: {{ $.Values.Namespace }} + labels: + app: {{ template "cassandra.name" $ }}-cain + chart: {{ template "cassandra.chart" $ }} + release: "{{ $release.Name }}" + heritage: "{{ $release.Service }}" +spec: + schedule: {{ $schedule.cron | quote }} + concurrencyPolicy: Forbid + startingDeadlineSeconds: 120 + jobTemplate: + spec: + template: + metadata: + annotations: + {{ toYaml $backup.annotations }} + spec: + restartPolicy: OnFailure + serviceAccountName: {{ template "cassandra.serviceAccountName" $ }} + {{- with $.Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + containers: + - name: cassandra-backup + {{- if and $.Values.multiarch.enabled $.Values.multiarch.image.cain }} + image: {{ $.Values.multiarch.image.cain }} + {{- else }} + image: "{{ $backup.image.repository }}:{{ $backup.image.tag }}" + {{- end }} + command: ["cain"] + args: + - backup + - --namespace + - {{ $release.Namespace }} + - --selector + - release={{ $release.Name }},app={{ template "cassandra.name" $ }} + - --keyspace + - {{ $schedule.keyspace }} + - --dst + - {{ $backup.destination }} + {{- with $backup.extraArgs }} +{{ toYaml . | indent 12 }} + {{- end }} + env: +{{- if $backup.google.serviceAccountSecret }} + - name: GOOGLE_APPLICATION_CREDENTIALS + value: "/etc/secrets/google/credentials.json" +{{- end }} + {{- with $backup.env }} +{{ toYaml . | indent 12 }} + {{- end }} +{{- $tierType := $.Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- with $backup.resources }} + resources: +{{ toYaml . | indent 14 }} + {{- end }} +{{- end }} +{{- if $backup.google.serviceAccountSecret }} + volumeMounts: + - name: google-service-account + mountPath: /etc/secrets/google/ +{{- end }} +{{- if $backup.google.serviceAccountSecret }} + volumes: + - name: google-service-account + secret: + secretName: {{ $backup.google.serviceAccountSecret | quote }} +{{- end }} + affinity: + podAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ template "cassandra.fullname" $ }} + - key: release + operator: In + values: + - {{ $release.Name }} + topologyKey: "kubernetes.io/hostname" + {{- with $values.tolerations }} + tolerations: +{{ toYaml . | indent 12 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/cassandra/templates/backup/rbac.yaml b/helm/atlas/charts/cassandra/templates/backup/rbac.yaml new file mode 100755 index 00000000000..05685171d15 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/backup/rbac.yaml @@ -0,0 +1,53 @@ +{{- if .Values.backup.enabled }} +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "cassandra.serviceAccountName" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +--- +{{- end }} +{{- if .Values.rbac.create }} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ template "cassandra.fullname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +rules: +- apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list"] +- apiGroups: [""] + resources: ["pods/exec"] + verbs: ["create"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ template "cassandra.fullname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "{{ .Release.Name }}" + heritage: "{{ .Release.Service }}" +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: {{ template "cassandra.fullname" . }}-backup +subjects: +- kind: ServiceAccount + name: {{ template "cassandra.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/cassandra/templates/cassandra-config.yaml b/helm/atlas/charts/cassandra/templates/cassandra-config.yaml new file mode 100644 index 00000000000..1c6436caca3 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/cassandra-config.yaml @@ -0,0 +1,1617 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: cassandra-config + namespace: atlas +data: + cassandra.yaml: |- + # Cassandra storage config YAML + + # NOTE: + # See http://wiki.apache.org/cassandra/StorageConfiguration for + # full explanations of configuration directives + # /NOTE + + # The name of the cluster. This is mainly used to prevent machines in + # one logical cluster from joining another. + cluster_name: cassandra + + # This defines the number of tokens randomly assigned to this node on the ring + # The more tokens, relative to other nodes, the larger the proportion of data + # that this node will store. You probably want all nodes to have the same number + # of tokens assuming they have equal hardware capability. + # + # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility, + # and will use the initial_token as described below. + # + # Specifying initial_token will override this setting on the node's initial start, + # on subsequent starts, this setting will apply even if initial token is set. + # + # If you already have a cluster with 1 token per node, and wish to migrate to + # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations + num_tokens: 256 + + # Triggers automatic allocation of num_tokens tokens for this node. The allocation + # algorithm attempts to choose tokens in a way that optimizes replicated load over + # the nodes in the datacenter for the replication strategy used by the specified + # keyspace. + # + # The load assigned to each node will be close to proportional to its number of + # vnodes. + # + # Only supported with the Murmur3Partitioner. + # allocate_tokens_for_keyspace: KEYSPACE + + # initial_token allows you to specify tokens manually. While you can use it with + # vnodes (num_tokens > 1, above) -- in which case you should provide a + # comma-separated list -- it's primarily used when adding nodes to legacy clusters + # that do not have vnodes enabled. + # initial_token: + + # See http://wiki.apache.org/cassandra/HintedHandoff + # May either be "true" or "false" to enable globally + hinted_handoff_enabled: true + + # When hinted_handoff_enabled is true, a black list of data centers that will not + # perform hinted handoff + # hinted_handoff_disabled_datacenters: + # - DC1 + # - DC2 + + # this defines the maximum amount of time a dead host will have hints + # generated. After it has been dead this long, new hints for it will not be + # created until it has been seen alive and gone down again. + max_hint_window_in_ms: 10800000 # 3 hours + + # Maximum throttle in KBs per second, per delivery thread. This will be + # reduced proportionally to the number of nodes in the cluster. (If there + # are two nodes in the cluster, each delivery thread will use the maximum + # rate; if there are three, each will throttle to half of the maximum, + # since we expect two nodes to be delivering hints simultaneously.) + hinted_handoff_throttle_in_kb: 1024 + + # Number of threads with which to deliver hints; + # Consider increasing this number when you have multi-dc deployments, since + # cross-dc handoff tends to be slower + max_hints_delivery_threads: 2 + + # Directory where Cassandra should store hints. + # If not set, the default directory is $CASSANDRA_HOME/data/hints. + # hints_directory: /var/lib/cassandra/hints + + # How often hints should be flushed from the internal buffers to disk. + # Will *not* trigger fsync. + hints_flush_period_in_ms: 10000 + + # Maximum size for a single hints file, in megabytes. + max_hints_file_size_in_mb: 128 + + # Compression to apply to the hint files. If omitted, hints files + # will be written uncompressed. LZ4, Snappy, and Deflate compressors + # are supported. + #hints_compression: + # - class_name: LZ4Compressor + # parameters: + # - + + # Maximum throttle in KBs per second, total. This will be + # reduced proportionally to the number of nodes in the cluster. + batchlog_replay_throttle_in_kb: 1024 + + # Authentication backend, implementing IAuthenticator; used to identify users + # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator, + # PasswordAuthenticator}. + # + # - AllowAllAuthenticator performs no checks - set it to disable authentication. + # - PasswordAuthenticator relies on username/password pairs to authenticate + # users. It keeps usernames and hashed passwords in system_auth.roles table. + # Please increase system_auth keyspace replication factor if you use this authenticator. + # If using PasswordAuthenticator, CassandraRoleManager must also be used (see below) + authenticator: AllowAllAuthenticator + + # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions + # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer, + # CassandraAuthorizer}. + # + # - AllowAllAuthorizer allows any action to any user - set it to disable authorization. + # - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please + # increase system_auth keyspace replication factor if you use this authorizer. + authorizer: AllowAllAuthorizer + + # Part of the Authentication & Authorization backend, implementing IRoleManager; used + # to maintain grants and memberships between roles. + # Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager, + # which stores role information in the system_auth keyspace. Most functions of the + # IRoleManager require an authenticated login, so unless the configured IAuthenticator + # actually implements authentication, most of this functionality will be unavailable. + # + # - CassandraRoleManager stores role data in the system_auth keyspace. Please + # increase system_auth keyspace replication factor if you use this role manager. + role_manager: CassandraRoleManager + + # Validity period for roles cache (fetching granted roles can be an expensive + # operation depending on the role manager, CassandraRoleManager is one example) + # Granted roles are cached for authenticated sessions in AuthenticatedUser and + # after the period specified here, become eligible for (async) reload. + # Defaults to 2000, set to 0 to disable caching entirely. + # Will be disabled automatically for AllowAllAuthenticator. + roles_validity_in_ms: 2000 + + # Refresh interval for roles cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If roles_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as roles_validity_in_ms. + # roles_update_interval_in_ms: 2000 + + # Validity period for permissions cache (fetching permissions can be an + # expensive operation depending on the authorizer, CassandraAuthorizer is + # one example). Defaults to 2000, set to 0 to disable. + # Will be disabled automatically for AllowAllAuthorizer. + permissions_validity_in_ms: 2000 + + # Refresh interval for permissions cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If permissions_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as permissions_validity_in_ms. + # permissions_update_interval_in_ms: 2000 + + # Validity period for credentials cache. This cache is tightly coupled to + # the provided PasswordAuthenticator implementation of IAuthenticator. If + # another IAuthenticator implementation is configured, this cache will not + # be automatically used and so the following settings will have no effect. + # Please note, credentials are cached in their encrypted form, so while + # activating this cache may reduce the number of queries made to the + # underlying table, it may not bring a significant reduction in the + # latency of individual authentication attempts. + # Defaults to 2000, set to 0 to disable credentials caching. + credentials_validity_in_ms: 2000 + + # Refresh interval for credentials cache (if enabled). + # After this interval, cache entries become eligible for refresh. Upon next + # access, an async reload is scheduled and the old value returned until it + # completes. If credentials_validity_in_ms is non-zero, then this must be + # also. + # Defaults to the same value as credentials_validity_in_ms. + # credentials_update_interval_in_ms: 2000 + + # The partitioner is responsible for distributing groups of rows (by + # partition key) across nodes in the cluster. You should leave this + # alone for new clusters. The partitioner can NOT be changed without + # reloading all data, so when upgrading you should set this to the + # same partitioner you were already using. + # + # Besides Murmur3Partitioner, partitioners included for backwards + # compatibility include RandomPartitioner, ByteOrderedPartitioner, and + # OrderPreservingPartitioner. + # + partitioner: org.apache.cassandra.dht.Murmur3Partitioner + + # Directories where Cassandra should store data on disk. Cassandra + # will spread data evenly across them, subject to the granularity of + # the configured compaction strategy. + # If not set, the default directory is $CASSANDRA_HOME/data/data. + # data_file_directories: + # - /var/lib/cassandra/data + + # commit log. when running on magnetic HDD, this should be a + # separate spindle than the data directories. + # If not set, the default directory is $CASSANDRA_HOME/data/commitlog. + # commitlog_directory: /var/lib/cassandra/commitlog + + # Enable / disable CDC functionality on a per-node basis. This modifies the logic used + # for write path allocation rejection (standard: never reject. cdc: reject Mutation + # containing a CDC-enabled table if at space limit in cdc_raw_directory). + cdc_enabled: false + + # CommitLogSegments are moved to this directory on flush if cdc_enabled: true and the + # segment contains mutations for a CDC-enabled table. This should be placed on a + # separate spindle than the data directories. If not set, the default directory is + # $CASSANDRA_HOME/data/cdc_raw. + # cdc_raw_directory: /var/lib/cassandra/cdc_raw + + # Policy for data disk failures: + # + # die + # shut down gossip and client transports and kill the JVM for any fs errors or + # single-sstable errors, so the node can be replaced. + # + # stop_paranoid + # shut down gossip and client transports even for single-sstable errors, + # kill the JVM for errors during startup. + # + # stop + # shut down gossip and client transports, leaving the node effectively dead, but + # can still be inspected via JMX, kill the JVM for errors during startup. + # + # best_effort + # stop using the failed disk and respond to requests based on + # remaining available sstables. This means you WILL see obsolete + # data at CL.ONE! + # + # ignore + # ignore fatal errors and let requests fail, as in pre-1.2 Cassandra + disk_failure_policy: stop + + # Policy for commit disk failures: + # + # die + # shut down gossip and Thrift and kill the JVM, so the node can be replaced. + # + # stop + # shut down gossip and Thrift, leaving the node effectively dead, but + # can still be inspected via JMX. + # + # stop_commit + # shutdown the commit log, letting writes collect but + # continuing to service reads, as in pre-2.0.5 Cassandra + # + # ignore + # ignore fatal errors and let the batches fail + commit_failure_policy: stop + + # Maximum size of the native protocol prepared statement cache + # + # Valid values are either "auto" (omitting the value) or a value greater 0. + # + # Note that specifying a too large value will result in long running GCs and possbily + # out-of-memory errors. Keep the value at a small fraction of the heap. + # + # If you constantly see "prepared statements discarded in the last minute because + # cache limit reached" messages, the first step is to investigate the root cause + # of these messages and check whether prepared statements are used correctly - + # i.e. use bind markers for variable parts. + # + # Do only change the default value, if you really have more prepared statements than + # fit in the cache. In most cases it is not neccessary to change this value. + # Constantly re-preparing statements is a performance penalty. + # + # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater + prepared_statements_cache_size_mb: + + # Maximum size of the Thrift prepared statement cache + # + # If you do not use Thrift at all, it is safe to leave this value at "auto". + # + # See description of 'prepared_statements_cache_size_mb' above for more information. + # + # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater + thrift_prepared_statements_cache_size_mb: + + # Maximum size of the key cache in memory. + # + # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the + # minimum, sometimes more. The key cache is fairly tiny for the amount of + # time it saves, so it's worthwhile to use it at large numbers. + # The row cache saves even more time, but must contain the entire row, + # so it is extremely space-intensive. It's best to only use the + # row cache if you have hot rows or static rows. + # + # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. + # + # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache. + key_cache_size_in_mb: + + # Duration in seconds after which Cassandra should + # save the key cache. Caches are saved to saved_caches_directory as + # specified in this configuration file. + # + # Saved caches greatly improve cold-start speeds, and is relatively cheap in + # terms of I/O for the key cache. Row cache saving is much more expensive and + # has limited use. + # + # Default is 14400 or 4 hours. + key_cache_save_period: 14400 + + # Number of keys from the key cache to save + # Disabled by default, meaning all keys are going to be saved + # key_cache_keys_to_save: 100 + + # Row cache implementation class name. Available implementations: + # + # org.apache.cassandra.cache.OHCProvider + # Fully off-heap row cache implementation (default). + # + # org.apache.cassandra.cache.SerializingCacheProvider + # This is the row cache implementation availabile + # in previous releases of Cassandra. + # row_cache_class_name: org.apache.cassandra.cache.OHCProvider + + # Maximum size of the row cache in memory. + # Please note that OHC cache implementation requires some additional off-heap memory to manage + # the map structures and some in-flight memory during operations before/after cache entries can be + # accounted against the cache capacity. This overhead is usually small compared to the whole capacity. + # Do not specify more memory that the system can afford in the worst usual situation and leave some + # headroom for OS block level cache. Do never allow your system to swap. + # + # Default value is 0, to disable row caching. + row_cache_size_in_mb: 0 + + # Duration in seconds after which Cassandra should save the row cache. + # Caches are saved to saved_caches_directory as specified in this configuration file. + # + # Saved caches greatly improve cold-start speeds, and is relatively cheap in + # terms of I/O for the key cache. Row cache saving is much more expensive and + # has limited use. + # + # Default is 0 to disable saving the row cache. + row_cache_save_period: 0 + + # Number of keys from the row cache to save. + # Specify 0 (which is the default), meaning all keys are going to be saved + # row_cache_keys_to_save: 100 + + # Maximum size of the counter cache in memory. + # + # Counter cache helps to reduce counter locks' contention for hot counter cells. + # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before + # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration + # of the lock hold, helping with hot counter cell updates, but will not allow skipping + # the read entirely. Only the local (clock, count) tuple of a counter cell is kept + # in memory, not the whole counter, so it's relatively cheap. + # + # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup. + # + # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache. + # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache. + counter_cache_size_in_mb: + + # Duration in seconds after which Cassandra should + # save the counter cache (keys only). Caches are saved to saved_caches_directory as + # specified in this configuration file. + # + # Default is 7200 or 2 hours. + counter_cache_save_period: 7200 + + # Number of keys from the counter cache to save + # Disabled by default, meaning all keys are going to be saved + # counter_cache_keys_to_save: 100 + + # saved caches + # If not set, the default directory is $CASSANDRA_HOME/data/saved_caches. + # saved_caches_directory: /var/lib/cassandra/saved_caches + + # Number of seconds the server will wait for each cache (row, key, etc ...) to load while starting + # the Cassandra process. Setting this to a negative value is equivalent to disabling all cache loading on startup + # while still having the cache during runtime. + # cache_load_timeout_seconds: 30 + + # commitlog_sync may be either "periodic" or "batch." + # + # When in batch mode, Cassandra won't ack writes until the commit log + # has been fsynced to disk. It will wait + # commitlog_sync_batch_window_in_ms milliseconds between fsyncs. + # This window should be kept short because the writer threads will + # be unable to do extra work while waiting. (You may need to increase + # concurrent_writes for the same reason.) + # + # commitlog_sync: batch + # commitlog_sync_batch_window_in_ms: 2 + # + # the other option is "periodic" where writes may be acked immediately + # and the CommitLog is simply synced every commitlog_sync_period_in_ms + # milliseconds. + commitlog_sync: periodic + commitlog_sync_period_in_ms: 10000 + + # The size of the individual commitlog file segments. A commitlog + # segment may be archived, deleted, or recycled once all the data + # in it (potentially from each columnfamily in the system) has been + # flushed to sstables. + # + # The default size is 32, which is almost always fine, but if you are + # archiving commitlog segments (see commitlog_archiving.properties), + # then you probably want a finer granularity of archiving; 8 or 16 MB + # is reasonable. + # Max mutation size is also configurable via max_mutation_size_in_kb setting in + # cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024. + # This should be positive and less than 2048. + # + # NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must + # be set to at least twice the size of max_mutation_size_in_kb / 1024 + # + {{- if eq .Values.janusgraph.atomic_mutation false}} + commitlog_segment_size_in_mb: 32 + {{- else }} + commitlog_segment_size_in_mb: {{ .Values.janusgraph.commitlog_segment_size_in_mb }} + {{- end }} + + # Compression to apply to the commit log. If omitted, the commit log + # will be written uncompressed. LZ4, Snappy, and Deflate compressors + # are supported. + # commitlog_compression: + # - class_name: LZ4Compressor + # parameters: + # - + + # any class that implements the SeedProvider interface and has a + # constructor that takes a Map of parameters will do. + seed_provider: + # Addresses of hosts that are deemed contact points. + # Cassandra nodes use this list of hosts to find each other and learn + # the topology of the ring. You must change this if you are running + # multiple nodes! + - class_name: org.apache.cassandra.locator.SimpleSeedProvider + parameters: + # seeds is actually a comma-delimited list of addresses. + # Ex: ",," + - seeds: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local" + + # For workloads with more data than can fit in memory, Cassandra's + # bottleneck will be reads that need to fetch data from + # disk. "concurrent_reads" should be set to (16 * number_of_drives) in + # order to allow the operations to enqueue low enough in the stack + # that the OS and drives can reorder them. Same applies to + # "concurrent_counter_writes", since counter writes read the current + # values before incrementing and writing them back. + # + # On the other hand, since writes are almost never IO bound, the ideal + # number of "concurrent_writes" is dependent on the number of cores in + # your system; (8 * number_of_cores) is a good rule of thumb. + concurrent_reads: 32 + concurrent_writes: 32 + concurrent_counter_writes: 32 + + # For materialized view writes, as there is a read involved, so this should + # be limited by the less of concurrent reads or concurrent writes. + concurrent_materialized_view_writes: 32 + + # Maximum memory to use for sstable chunk cache and buffer pooling. + # 32MB of this are reserved for pooling buffers, the rest is used as an + # cache that holds uncompressed sstable chunks. + # Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap, + # so is in addition to the memory allocated for heap. The cache also has on-heap + # overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size + # if the default 64k chunk size is used). + # Memory is only allocated when needed. + # file_cache_size_in_mb: 512 + + # Flag indicating whether to allocate on or off heap when the sstable buffer + # pool is exhausted, that is when it has exceeded the maximum memory + # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request. + + # buffer_pool_use_heap_if_exhausted: true + + # The strategy for optimizing disk read + # Possible values are: + # ssd (for solid state disks, the default) + # spinning (for spinning disks) + # disk_optimization_strategy: ssd + + # Total permitted memory to use for memtables. Cassandra will stop + # accepting writes when the limit is exceeded until a flush completes, + # and will trigger a flush based on memtable_cleanup_threshold + # If omitted, Cassandra will set both to 1/4 the size of the heap. + # memtable_heap_space_in_mb: 2048 + # memtable_offheap_space_in_mb: 2048 + + # memtable_cleanup_threshold is deprecated. The default calculation + # is the only reasonable choice. See the comments on memtable_flush_writers + # for more information. + # + # Ratio of occupied non-flushing memtable size to total permitted size + # that will trigger a flush of the largest memtable. Larger mct will + # mean larger flushes and hence less compaction, but also less concurrent + # flush activity which can make it difficult to keep your disks fed + # under heavy write load. + # + # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1) + # memtable_cleanup_threshold: 0.11 + + # Specify the way Cassandra allocates and manages memtable memory. + # Options are: + # + # heap_buffers + # on heap nio buffers + # + # offheap_buffers + # off heap (direct) nio buffers + # + # offheap_objects + # off heap objects + memtable_allocation_type: heap_buffers + + # Limits the maximum Merkle tree depth to avoid consuming too much + # memory during repairs. + # + # The default setting of 18 generates trees of maximum size around + # 50 MiB / tree. If you are running out of memory during repairs consider + # lowering this to 15 (~6 MiB / tree) or lower, but try not to lower it + # too much past that or you will lose too much resolution and stream + # too much redundant data during repair. Cannot be set lower than 10. + # + # For more details see https://issues.apache.org/jira/browse/CASSANDRA-14096. + # + # repair_session_max_tree_depth: 18 + + # Total space to use for commit logs on disk. + # + # If space gets above this value, Cassandra will flush every dirty CF + # in the oldest segment and remove it. So a small total commitlog space + # will tend to cause more flush activity on less-active columnfamilies. + # + # The default value is the smaller of 8192, and 1/4 of the total space + # of the commitlog volume. + # + # commitlog_total_space_in_mb: 8192 + + # This sets the number of memtable flush writer threads per disk + # as well as the total number of memtables that can be flushed concurrently. + # These are generally a combination of compute and IO bound. + # + # Memtable flushing is more CPU efficient than memtable ingest and a single thread + # can keep up with the ingest rate of a whole server on a single fast disk + # until it temporarily becomes IO bound under contention typically with compaction. + # At that point you need multiple flush threads. At some point in the future + # it may become CPU bound all the time. + # + # You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation + # metric which should be 0, but will be non-zero if threads are blocked waiting on flushing + # to free memory. + # + # memtable_flush_writers defaults to two for a single data directory. + # This means that two memtables can be flushed concurrently to the single data directory. + # If you have multiple data directories the default is one memtable flushing at a time + # but the flush will use a thread per data directory so you will get two or more writers. + # + # Two is generally enough to flush on a fast disk [array] mounted as a single data directory. + # Adding more flush writers will result in smaller more frequent flushes that introduce more + # compaction overhead. + # + # There is a direct tradeoff between number of memtables that can be flushed concurrently + # and flush size and frequency. More is not better you just need enough flush writers + # to never stall waiting for flushing to free memory. + # + #memtable_flush_writers: 2 + + # Total space to use for change-data-capture logs on disk. + # + # If space gets above this value, Cassandra will throw WriteTimeoutException + # on Mutations including tables with CDC enabled. A CDCCompactor is responsible + # for parsing the raw CDC logs and deleting them when parsing is completed. + # + # The default value is the min of 4096 mb and 1/8th of the total space + # of the drive where cdc_raw_directory resides. + # cdc_total_space_in_mb: 4096 + + # When we hit our cdc_raw limit and the CDCCompactor is either running behind + # or experiencing backpressure, we check at the following interval to see if any + # new space for cdc-tracked tables has been made available. Default to 250ms + # cdc_free_space_check_interval_ms: 250 + + # A fixed memory pool size in MB for for SSTable index summaries. If left + # empty, this will default to 5% of the heap size. If the memory usage of + # all index summaries exceeds this limit, SSTables with low read rates will + # shrink their index summaries in order to meet this limit. However, this + # is a best-effort process. In extreme conditions Cassandra may need to use + # more than this amount of memory. + index_summary_capacity_in_mb: + + # How frequently index summaries should be resampled. This is done + # periodically to redistribute memory from the fixed-size pool to sstables + # proportional their recent read rates. Setting to -1 will disable this + # process, leaving existing index summaries at their current sampling level. + index_summary_resize_interval_in_minutes: 60 + + # Whether to, when doing sequential writing, fsync() at intervals in + # order to force the operating system to flush the dirty + # buffers. Enable this to avoid sudden dirty buffer flushing from + # impacting read latencies. Almost always a good idea on SSDs; not + # necessarily on platters. + trickle_fsync: false + trickle_fsync_interval_in_kb: 10240 + + # TCP port, for commands and data + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + storage_port: 7000 + + # SSL port, for encrypted communication. Unused unless enabled in + # encryption_options + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + ssl_storage_port: 7001 + + # Address or interface to bind to and tell other Cassandra nodes to connect to. + # You _must_ change this if you want multiple nodes to be able to communicate! + # + # Set listen_address OR listen_interface, not both. + # + # Leaving it blank leaves it up to InetAddress.getLocalHost(). This + # will always do the Right Thing _if_ the node is properly configured + # (hostname, name resolution, etc), and the Right Thing is to use the + # address associated with the hostname (it might not be). + # + # Setting listen_address to 0.0.0.0 is always wrong. + # + listen_address: 172.28.60.207 + + # Set listen_address OR listen_interface, not both. Interfaces must correspond + # to a single address, IP aliasing is not supported. + # listen_interface: eth0 + + # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address + # you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4 + # address will be used. If true the first ipv6 address will be used. Defaults to false preferring + # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. + # listen_interface_prefer_ipv6: false + + # Address to broadcast to other Cassandra nodes + # Leaving this blank will set it to the same value as listen_address + broadcast_address: 172.28.60.207 + + # When using multiple physical network interfaces, set this + # to true to listen on broadcast_address in addition to + # the listen_address, allowing nodes to communicate in both + # interfaces. + # Ignore this property if the network configuration automatically + # routes between the public and private networks such as EC2. + # listen_on_broadcast_address: false + + # Internode authentication backend, implementing IInternodeAuthenticator; + # used to allow/disallow connections from peer nodes. + # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator + + # Whether to start the native transport server. + # Please note that the address on which the native transport is bound is the + # same as the rpc_address. The port however is different and specified below. + start_native_transport: true + # port for the CQL native transport to listen for clients on + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + native_transport_port: 9042 + # Enabling native transport encryption in client_encryption_options allows you to either use + # encryption for the standard port or to use a dedicated, additional port along with the unencrypted + # standard native_transport_port. + # Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption + # for native_transport_port. Setting native_transport_port_ssl to a different value + # from native_transport_port will use encryption for native_transport_port_ssl while + # keeping native_transport_port unencrypted. + # native_transport_port_ssl: 9142 + # The maximum threads for handling requests when the native transport is used. + # This is similar to rpc_max_threads though the default differs slightly (and + # there is no native_transport_min_threads, idle threads will always be stopped + # after 30 seconds). + # native_transport_max_threads: 128 + # + # The maximum size of allowed frame. Frame (requests) larger than this will + # be rejected as invalid. The default is 256MB. If you're changing this parameter, + # you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048. + # native_transport_max_frame_size_in_mb: 256 + + # The maximum number of concurrent client connections. + # The default is -1, which means unlimited. + # native_transport_max_concurrent_connections: -1 + + # The maximum number of concurrent client connections per source ip. + # The default is -1, which means unlimited. + # native_transport_max_concurrent_connections_per_ip: -1 + + # Whether to start the thrift rpc server. + start_rpc: true + + # The address or interface to bind the Thrift RPC service and native transport + # server to. + # + # Set rpc_address OR rpc_interface, not both. + # + # Leaving rpc_address blank has the same effect as on listen_address + # (i.e. it will be based on the configured hostname of the node). + # + # Note that unlike listen_address, you can specify 0.0.0.0, but you must also + # set broadcast_rpc_address to a value other than 0.0.0.0. + # + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + rpc_address: 0.0.0.0 + + # Set rpc_address OR rpc_interface, not both. Interfaces must correspond + # to a single address, IP aliasing is not supported. + # rpc_interface: eth1 + + # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address + # you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4 + # address will be used. If true the first ipv6 address will be used. Defaults to false preferring + # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6. + # rpc_interface_prefer_ipv6: false + + # port for Thrift to listen for clients on + rpc_port: 9160 + + # RPC address to broadcast to drivers and other Cassandra nodes. This cannot + # be set to 0.0.0.0. If left blank, this will be set to the value of + # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must + # be set. + broadcast_rpc_address: 172.28.60.207 + + # enable or disable keepalive on rpc/native connections + rpc_keepalive: true + + # Cassandra provides two out-of-the-box options for the RPC Server: + # + # sync + # One thread per thrift connection. For a very large number of clients, memory + # will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size + # per thread, and that will correspond to your use of virtual memory (but physical memory + # may be limited depending on use of stack space). + # + # hsha + # Stands for "half synchronous, half asynchronous." All thrift clients are handled + # asynchronously using a small number of threads that does not vary with the amount + # of thrift clients (and thus scales well to many clients). The rpc requests are still + # synchronous (one thread per active request). If hsha is selected then it is essential + # that rpc_max_threads is changed from the default value of unlimited. + # + # The default is sync because on Windows hsha is about 30% slower. On Linux, + # sync/hsha performance is about the same, with hsha of course using less memory. + # + # Alternatively, can provide your own RPC server by providing the fully-qualified class name + # of an o.a.c.t.TServerFactory that can create an instance of it. + rpc_server_type: sync + + # Uncomment rpc_min|max_thread to set request pool size limits. + # + # Regardless of your choice of RPC server (see above), the number of maximum requests in the + # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync + # RPC server, it also dictates the number of clients that can be connected at all). + # + # The default is unlimited and thus provides no protection against clients overwhelming the server. You are + # encouraged to set a maximum that makes sense for you in production, but do keep in mind that + # rpc_max_threads represents the maximum number of client requests this server may execute concurrently. + # + # rpc_min_threads: 16 + # rpc_max_threads: 2048 + + # uncomment to set socket buffer sizes on rpc connections + # rpc_send_buff_size_in_bytes: + # rpc_recv_buff_size_in_bytes: + + # Uncomment to set socket buffer size for internode communication + # Note that when setting this, the buffer size is limited by net.core.wmem_max + # and when not setting it it is defined by net.ipv4.tcp_wmem + # See also: + # /proc/sys/net/core/wmem_max + # /proc/sys/net/core/rmem_max + # /proc/sys/net/ipv4/tcp_wmem + # /proc/sys/net/ipv4/tcp_wmem + # and 'man tcp' + # internode_send_buff_size_in_bytes: + + # Uncomment to set socket buffer size for internode communication + # Note that when setting this, the buffer size is limited by net.core.wmem_max + # and when not setting it it is defined by net.ipv4.tcp_wmem + # internode_recv_buff_size_in_bytes: + + # Frame size for thrift (maximum message length). + thrift_framed_transport_size_in_mb: 15 + + # Set to true to have Cassandra create a hard link to each sstable + # flushed or streamed locally in a backups/ subdirectory of the + # keyspace data. Removing these links is the operator's + # responsibility. + incremental_backups: false + + # Whether or not to take a snapshot before each compaction. Be + # careful using this option, since Cassandra won't clean up the + # snapshots for you. Mostly useful if you're paranoid when there + # is a data format change. + snapshot_before_compaction: false + + # Whether or not a snapshot is taken of the data before keyspace truncation + # or dropping of column families. The STRONGLY advised default of true + # should be used to provide data safety. If you set this flag to false, you will + # lose data on truncation or drop. + auto_snapshot: true + + # Granularity of the collation index of rows within a partition. + # Increase if your rows are large, or if you have a very large + # number of rows per partition. The competing goals are these: + # + # - a smaller granularity means more index entries are generated + # and looking up rows withing the partition by collation column + # is faster + # - but, Cassandra will keep the collation index in memory for hot + # rows (as part of the key cache), so a larger granularity means + # you can cache more hot rows + column_index_size_in_kb: 64 + + # Per sstable indexed key cache entries (the collation index in memory + # mentioned above) exceeding this size will not be held on heap. + # This means that only partition information is held on heap and the + # index entries are read from disk. + # + # Note that this size refers to the size of the + # serialized index information and not the size of the partition. + column_index_cache_size_in_kb: 2 + + # Number of simultaneous compactions to allow, NOT including + # validation "compactions" for anti-entropy repair. Simultaneous + # compactions can help preserve read performance in a mixed read/write + # workload, by mitigating the tendency of small sstables to accumulate + # during a single long running compactions. The default is usually + # fine and if you experience problems with compaction running too + # slowly or too fast, you should look at + # compaction_throughput_mb_per_sec first. + # + # concurrent_compactors defaults to the smaller of (number of disks, + # number of cores), with a minimum of 2 and a maximum of 8. + # + # If your data directories are backed by SSD, you should increase this + # to the number of cores. + #concurrent_compactors: 1 + + # Throttles compaction to the given total throughput across the entire + # system. The faster you insert data, the faster you need to compact in + # order to keep the sstable count down, but in general, setting this to + # 16 to 32 times the rate you are inserting data is more than sufficient. + # Setting this to 0 disables throttling. Note that this account for all types + # of compaction, including validation compaction. + compaction_throughput_mb_per_sec: 16 + + # When compacting, the replacement sstable(s) can be opened before they + # are completely written, and used in place of the prior sstables for + # any range that has been written. This helps to smoothly transfer reads + # between the sstables, reducing page cache churn and keeping hot rows hot + sstable_preemptive_open_interval_in_mb: 50 + + # Throttles all outbound streaming file transfers on this node to the + # given total throughput in Mbps. This is necessary because Cassandra does + # mostly sequential IO when streaming data during bootstrap or repair, which + # can lead to saturating the network connection and degrading rpc performance. + # When unset, the default is 200 Mbps or 25 MB/s. + # stream_throughput_outbound_megabits_per_sec: 200 + + # Throttles all streaming file transfer between the datacenters, + # this setting allows users to throttle inter dc stream throughput in addition + # to throttling all network stream traffic as configured with + # stream_throughput_outbound_megabits_per_sec + # When unset, the default is 200 Mbps or 25 MB/s + # inter_dc_stream_throughput_outbound_megabits_per_sec: 200 + + # Server side timeouts for requests. The server will return a timeout exception + # to the client if it can't complete an operation within the corresponding + # timeout. Those settings are a protection against: + # 1) having client wait on an operation that might never terminate due to some + # failures. + # 2) operations that use too much CPU/read too much data (leading to memory build + # up) by putting a limit to how long an operation will execute. + # For this reason, you should avoid putting these settings too high. In other words, + # if you are timing out requests because of underlying resource constraints then + # increasing the timeout will just cause more problems. Of course putting them too + # low is equally ill-advised since clients could get timeouts even for successful + # operations just because the timeout setting is too tight. + + # How long the coordinator should wait for read operations to complete + read_request_timeout_in_ms: 5000 + # How long the coordinator should wait for seq or index scans to complete + range_request_timeout_in_ms: 10000 + # How long the coordinator should wait for writes to complete + write_request_timeout_in_ms: 2000 + # How long the coordinator should wait for counter writes to complete + counter_write_request_timeout_in_ms: 5000 + # How long a coordinator should continue to retry a CAS operation + # that contends with other proposals for the same row + cas_contention_timeout_in_ms: 1000 + # How long the coordinator should wait for truncates to complete + # (This can be much longer, because unless auto_snapshot is disabled + # we need to flush first so we can snapshot before removing the data.) + truncate_request_timeout_in_ms: 60000 + # The default timeout for other, miscellaneous operations + request_timeout_in_ms: 10000 + + # How long before a node logs slow queries. Select queries that take longer than + # this timeout to execute, will generate an aggregated log message, so that slow queries + # can be identified. Set this value to zero to disable slow query logging. + slow_query_log_timeout_in_ms: 500 + + # Enable operation timeout information exchange between nodes to accurately + # measure request timeouts. If disabled, replicas will assume that requests + # were forwarded to them instantly by the coordinator, which means that + # under overload conditions we will waste that much extra time processing + # already-timed-out requests. + # + # Warning: before enabling this property make sure to ntp is installed + # and the times are synchronized between the nodes. + cross_node_timeout: false + + # Set keep-alive period for streaming + # This node will send a keep-alive message periodically with this period. + # If the node does not receive a keep-alive message from the peer for + # 2 keep-alive cycles the stream session times out and fail + # Default value is 300s (5 minutes), which means stalled stream + # times out in 10 minutes by default + # streaming_keep_alive_period_in_secs: 300 + + # phi value that must be reached for a host to be marked down. + # most users should never need to adjust this. + # phi_convict_threshold: 8 + + # endpoint_snitch -- Set this to a class that implements + # IEndpointSnitch. The snitch has two functions: + # + # - it teaches Cassandra enough about your network topology to route + # requests efficiently + # - it allows Cassandra to spread replicas around your cluster to avoid + # correlated failures. It does this by grouping machines into + # "datacenters" and "racks." Cassandra will do its best not to have + # more than one replica on the same "rack" (which may not actually + # be a physical location) + # + # CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH + # ONCE DATA IS INSERTED INTO THE CLUSTER. This would cause data loss. + # This means that if you start with the default SimpleSnitch, which + # locates every node on "rack1" in "datacenter1", your only options + # if you need to add another datacenter are GossipingPropertyFileSnitch + # (and the older PFS). From there, if you want to migrate to an + # incompatible snitch like Ec2Snitch you can do it by adding new nodes + # under Ec2Snitch (which will locate them in a new "datacenter") and + # decommissioning the old ones. + # + # Out of the box, Cassandra provides: + # + # SimpleSnitch: + # Treats Strategy order as proximity. This can improve cache + # locality when disabling read repair. Only appropriate for + # single-datacenter deployments. + # + # GossipingPropertyFileSnitch + # This should be your go-to snitch for production use. The rack + # and datacenter for the local node are defined in + # cassandra-rackdc.properties and propagated to other nodes via + # gossip. If cassandra-topology.properties exists, it is used as a + # fallback, allowing migration from the PropertyFileSnitch. + # + # PropertyFileSnitch: + # Proximity is determined by rack and data center, which are + # explicitly configured in cassandra-topology.properties. + # + # Ec2Snitch: + # Appropriate for EC2 deployments in a single Region. Loads Region + # and Availability Zone information from the EC2 API. The Region is + # treated as the datacenter, and the Availability Zone as the rack. + # Only private IPs are used, so this will not work across multiple + # Regions. + # + # Ec2MultiRegionSnitch: + # Uses public IPs as broadcast_address to allow cross-region + # connectivity. (Thus, you should set seed addresses to the public + # IP as well.) You will need to open the storage_port or + # ssl_storage_port on the public IP firewall. (For intra-Region + # traffic, Cassandra will switch to the private IP after + # establishing a connection.) + # + # RackInferringSnitch: + # Proximity is determined by rack and data center, which are + # assumed to correspond to the 3rd and 2nd octet of each node's IP + # address, respectively. Unless this happens to match your + # deployment conventions, this is best used as an example of + # writing a custom Snitch class and is provided in that spirit. + # + # You can use a custom Snitch by setting this to the full class name + # of the snitch, which will be assumed to be on your classpath. + endpoint_snitch: SimpleSnitch + + # controls how often to perform the more expensive part of host score + # calculation + dynamic_snitch_update_interval_in_ms: 100 + # controls how often to reset all host scores, allowing a bad host to + # possibly recover + dynamic_snitch_reset_interval_in_ms: 600000 + # if set greater than zero and read_repair_chance is < 1.0, this will allow + # 'pinning' of replicas to hosts in order to increase cache capacity. + # The badness threshold will control how much worse the pinned host has to be + # before the dynamic snitch will prefer other replicas over it. This is + # expressed as a double which represents a percentage. Thus, a value of + # 0.2 means Cassandra would continue to prefer the static snitch values + # until the pinned host was 20% worse than the fastest. + dynamic_snitch_badness_threshold: 0.1 + + # request_scheduler -- Set this to a class that implements + # RequestScheduler, which will schedule incoming client requests + # according to the specific policy. This is useful for multi-tenancy + # with a single Cassandra cluster. + # NOTE: This is specifically for requests from the client and does + # not affect inter node communication. + # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place + # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of + # client requests to a node with a separate queue for each + # request_scheduler_id. The scheduler is further customized by + # request_scheduler_options as described below. + request_scheduler: org.apache.cassandra.scheduler.NoScheduler + + # Scheduler Options vary based on the type of scheduler + # + # NoScheduler + # Has no options + # + # RoundRobin + # throttle_limit + # The throttle_limit is the number of in-flight + # requests per client. Requests beyond + # that limit are queued up until + # running requests can complete. + # The value of 80 here is twice the number of + # concurrent_reads + concurrent_writes. + # default_weight + # default_weight is optional and allows for + # overriding the default which is 1. + # weights + # Weights are optional and will default to 1 or the + # overridden default_weight. The weight translates into how + # many requests are handled during each turn of the + # RoundRobin, based on the scheduler id. + # + # request_scheduler_options: + # throttle_limit: 80 + # default_weight: 5 + # weights: + # Keyspace1: 1 + # Keyspace2: 5 + + # request_scheduler_id -- An identifier based on which to perform + # the request scheduling. Currently the only valid option is keyspace. + # request_scheduler_id: keyspace + + # Enable or disable inter-node encryption + # JVM defaults for supported SSL socket protocols and cipher suites can + # be replaced using custom encryption options. This is not recommended + # unless you have policies in place that dictate certain settings, or + # need to disable vulnerable ciphers or protocols in case the JVM cannot + # be updated. + # FIPS compliant settings can be configured at JVM level and should not + # involve changing encryption settings here: + # https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html + # *NOTE* No custom encryption options are enabled at the moment + # The available internode options are : all, none, dc, rack + # + # If set to dc cassandra will encrypt the traffic between the DCs + # If set to rack cassandra will encrypt the traffic between the racks + # + # The passwords used in these options must match the passwords used when generating + # the keystore and truststore. For instructions on generating these files, see: + # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore + # + server_encryption_options: + internode_encryption: none + keystore: conf/.keystore + keystore_password: cassandra + truststore: conf/.truststore + truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + # require_client_auth: false + # require_endpoint_verification: false + + # enable or disable client/server encryption. + client_encryption_options: + enabled: false + # If enabled and optional is set to true encrypted and unencrypted connections are handled. + optional: false + keystore: conf/.keystore + keystore_password: cassandra + # require_client_auth: false + # Set trustore and truststore_password if require_client_auth is true + # truststore: conf/.truststore + # truststore_password: cassandra + # More advanced defaults below: + # protocol: TLS + # algorithm: SunX509 + # store_type: JKS + # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA] + + # internode_compression controls whether traffic between nodes is + # compressed. + # Can be: + # + # all + # all traffic is compressed + # + # dc + # traffic between different datacenters is compressed + # + # none + # nothing is compressed. + internode_compression: dc + + # Enable or disable tcp_nodelay for inter-dc communication. + # Disabling it will result in larger (but fewer) network packets being sent, + # reducing overhead from the TCP protocol itself, at the cost of increasing + # latency if you block for cross-datacenter responses. + inter_dc_tcp_nodelay: false + + # TTL for different trace types used during logging of the repair process. + tracetype_query_ttl: 86400 + tracetype_repair_ttl: 604800 + + # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level + # This threshold can be adjusted to minimize logging if necessary + # gc_log_threshold_in_ms: 200 + + # If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at + # INFO level + # UDFs (user defined functions) are disabled by default. + # As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code. + enable_user_defined_functions: false + + # Enables scripted UDFs (JavaScript UDFs). + # Java UDFs are always enabled, if enable_user_defined_functions is true. + # Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider. + # This option has no effect, if enable_user_defined_functions is false. + enable_scripted_user_defined_functions: false + + # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation. + # Lowering this value on Windows can provide much tighter latency and better throughput, however + # some virtualized environments may see a negative performance impact from changing this setting + # below their system default. The sysinternals 'clockres' tool can confirm your system's default + # setting. + windows_timer_interval: 1 + + + # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from + # a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by + # the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys + # can still (and should!) be in the keystore and will be used on decrypt operations + # (to handle the case of key rotation). + # + # It is strongly recommended to download and install Java Cryptography Extension (JCE) + # Unlimited Strength Jurisdiction Policy Files for your version of the JDK. + # (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html) + # + # Currently, only the following file types are supported for transparent data encryption, although + # more are coming in future cassandra releases: commitlog, hints + transparent_data_encryption_options: + enabled: false + chunk_length_kb: 64 + cipher: AES/CBC/PKCS5Padding + key_alias: testing:1 + # CBC IV length for AES needs to be 16 bytes (which is also the default size) + # iv_length: 16 + key_provider: + - class_name: org.apache.cassandra.security.JKSKeyProvider + parameters: + - keystore: conf/.keystore + keystore_password: cassandra + store_type: JCEKS + key_password: cassandra + + + ##################### + # SAFETY THRESHOLDS # + ##################### + + # When executing a scan, within or across a partition, we need to keep the + # tombstones seen in memory so we can return them to the coordinator, which + # will use them to make sure other replicas also know about the deleted rows. + # With workloads that generate a lot of tombstones, this can cause performance + # problems and even exaust the server heap. + # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets) + # Adjust the thresholds here if you understand the dangers and want to + # scan more tombstones anyway. These thresholds may also be adjusted at runtime + # using the StorageService mbean. + tombstone_warn_threshold: 10000 + tombstone_failure_threshold: 1000000 + + # Filtering and secondary index queries at read consistency levels above ONE/LOCAL_ONE use a + # mechanism called replica filtering protection to ensure that results from stale replicas do + # not violate consistency. (See CASSANDRA-8272 and CASSANDRA-15907 for more details.) This + # mechanism materializes replica results by partition on-heap at the coordinator. The more possibly + # stale results returned by the replicas, the more rows materialized during the query. + replica_filtering_protection: + # These thresholds exist to limit the damage severely out-of-date replicas can cause during these + # queries. They limit the number of rows from all replicas individual index and filtering queries + # can materialize on-heap to return correct results at the desired read consistency level. + # + # "cached_replica_rows_warn_threshold" is the per-query threshold at which a warning will be logged. + # "cached_replica_rows_fail_threshold" is the per-query threshold at which the query will fail. + # + # These thresholds may also be adjusted at runtime using the StorageService mbean. + # + # If the failure threshold is breached, it is likely that either the current page/fetch size + # is too large or one or more replicas is severely out-of-sync and in need of repair. + cached_rows_warn_threshold: 2000 + cached_rows_fail_threshold: 32000 + + {{- if eq .Values.janusgraph.atomic_mutation false}} + # Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default. + # Caution should be taken on increasing the size of this threshold as it can lead to node instability. + batch_size_warn_threshold_in_kb: 5 + + # Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default. + batch_size_fail_threshold_in_kb: 50 + {{- else }} + batch_size_warn_threshold_in_kb: {{ .Values.janusgraph.cassandra_batch_size_warn_threshold_in_kb }} + + batch_size_fail_threshold_in_kb: {{ .Values.janusgraph.cassandra_batch_size_failure_threshold_in_kb }} + {{- end }} + + # Log WARN on any batches not of type LOGGED than span across more partitions than this limit + unlogged_batch_across_partitions_warn_threshold: 10 + + # Log a warning when compacting partitions larger than this value + compaction_large_partition_warning_threshold_mb: 100 + + # GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level + # Adjust the threshold based on your application throughput requirement + # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level + gc_warn_threshold_in_ms: 1000 + + # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption + # early. Any value size larger than this threshold will result into marking an SSTable + # as corrupted. This should be positive and less than 2048. + # max_value_size_in_mb: 256 + + # Back-pressure settings # + # If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation + # sent to replicas, with the aim of reducing pressure on overloaded replicas. + back_pressure_enabled: false + # The back-pressure strategy applied. + # The default implementation, RateBasedBackPressure, takes three arguments: + # high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests. + # If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor; + # if above high ratio, the rate limiting is increased by the given factor; + # such factor is usually best configured between 1 and 10, use larger values for a faster recovery + # at the expense of potentially more dropped mutations; + # the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica, + # if SLOW at the speed of the slowest one. + # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and + # provide a public constructor accepting a Map. + back_pressure_strategy: + - class_name: org.apache.cassandra.net.RateBasedBackPressure + parameters: + - high_ratio: 0.90 + factor: 5 + flow: FAST + + # Coalescing Strategies # + # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more). + # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in + # virtualized environments, the point at which an application can be bound by network packet processing can be + # surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal + # doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process + # is sufficient for many applications such that no load starvation is experienced even without coalescing. + # There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages + # per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one + # trip to read from a socket, and all the task submission work can be done at the same time reducing context switching + # and increasing cache friendliness of network message processing. + # See CASSANDRA-8692 for details. + + # Strategy to use for coalescing messages in OutboundTcpConnection. + # Can be fixed, movingaverage, timehorizon, disabled (default). + # You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name. + # otc_coalescing_strategy: DISABLED + + # How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first + # message is received before it will be sent with any accompanying messages. For moving average this is the + # maximum amount of time that will be waited as well as the interval at which messages must arrive on average + # for coalescing to be enabled. + # otc_coalescing_window_us: 200 + + # Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128. + # otc_coalescing_enough_coalesced_messages: 8 + + # How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection. + # Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory + # taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value + # will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU + # time and queue contention while iterating the backlog of messages. + # An interval of 0 disables any wait time, which is the behavior of former Cassandra versions. + # + # otc_backlog_expiration_interval_ms: 200 + + + ######################### + # EXPERIMENTAL FEATURES # + ######################### + + # Enables materialized view creation on this node. + # Materialized views are considered experimental and are not recommended for production use. + enable_materialized_views: true + + # Enables SASI index creation on this node. + # SASI indexes are considered experimental and are not recommended for production use. + enable_sasi_indexes: true + + # Enables the used of 'ALTER ... DROP COMPACT STORAGE' statements on this node. + # 'ALTER ... DROP COMPACT STORAGE' is considered experimental and is not recommended for production use. + enable_drop_compact_storage: false + cassandra-env.sh: | + # Licensed to the Apache Software Foundation (ASF) under one + # or more contributor license agreements. See the NOTICE file + # distributed with this work for additional information + # regarding copyright ownership. The ASF licenses this file + # to you under the Apache License, Version 2.0 (the + # "License"); you may not use this file except in compliance + # with the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + + calculate_heap_sizes() + { + case "`uname`" in + Linux) + system_memory_in_mb=`free -m | awk '/:/ {print $2;exit}'` + system_cpu_cores=`egrep -c 'processor([[:space:]]+):.*' /proc/cpuinfo` + ;; + FreeBSD) + system_memory_in_bytes=`sysctl hw.physmem | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + SunOS) + system_memory_in_mb=`prtconf | awk '/Memory size:/ {print $3}'` + system_cpu_cores=`psrinfo | wc -l` + ;; + Darwin) + system_memory_in_bytes=`sysctl hw.memsize | awk '{print $2}'` + system_memory_in_mb=`expr $system_memory_in_bytes / 1024 / 1024` + system_cpu_cores=`sysctl hw.ncpu | awk '{print $2}'` + ;; + *) + # assume reasonable defaults for e.g. a modern desktop or + # cheap server + system_memory_in_mb="2048" + system_cpu_cores="2" + ;; + esac + + # some systems like the raspberry pi don't report cores, use at least 1 + if [ "$system_cpu_cores" -lt "1" ] + then + system_cpu_cores="1" + fi + + # set max heap size based on the following + # max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB)) + # calculate 1/2 ram and cap to 1024MB + # calculate 1/4 ram and cap to 8192MB + # pick the max + half_system_memory_in_mb=`expr $system_memory_in_mb / 2` + quarter_system_memory_in_mb=`expr $half_system_memory_in_mb / 2` + if [ "$half_system_memory_in_mb" -gt "1024" ] + then + half_system_memory_in_mb="1024" + fi + if [ "$quarter_system_memory_in_mb" -gt "8192" ] + then + quarter_system_memory_in_mb="8192" + fi + if [ "$half_system_memory_in_mb" -gt "$quarter_system_memory_in_mb" ] + then + max_heap_size_in_mb="$half_system_memory_in_mb" + else + max_heap_size_in_mb="$quarter_system_memory_in_mb" + fi + MAX_HEAP_SIZE="${max_heap_size_in_mb}M" + + # Young gen: min(max_sensible_per_modern_cpu_core * num_cores, 1/4 * heap size) + max_sensible_yg_per_core_in_mb="100" + max_sensible_yg_in_mb=`expr $max_sensible_yg_per_core_in_mb "*" $system_cpu_cores` + + desired_yg_in_mb=`expr $max_heap_size_in_mb / 4` + + if [ "$desired_yg_in_mb" -gt "$max_sensible_yg_in_mb" ] + then + HEAP_NEWSIZE="${max_sensible_yg_in_mb}M" + else + HEAP_NEWSIZE="${desired_yg_in_mb}M" + fi + } + + # Determine the sort of JVM we'll be running on. + java_ver_output=`"${JAVA:-java}" -version 2>&1` + jvmver=`echo "$java_ver_output" | grep '[openjdk|java] version' | awk -F'"' 'NR==1 {print $2}' | cut -d\- -f1` + JVM_VERSION=${jvmver%_*} + JVM_PATCH_VERSION=${jvmver#*_} + + if [ "$JVM_VERSION" \< "1.8" ] ; then + echo "Cassandra 3.0 and later require Java 8u40 or later." + exit 1; + fi + + if [ "$JVM_VERSION" \< "1.8" ] && [ "$JVM_PATCH_VERSION" -lt 40 ] ; then + echo "Cassandra 3.0 and later require Java 8u40 or later." + exit 1; + fi + + jvm=`echo "$java_ver_output" | grep -A 1 '[openjdk|java] version' | awk 'NR==2 {print $1}'` + case "$jvm" in + OpenJDK) + JVM_VENDOR=OpenJDK + # this will be "64-Bit" or "32-Bit" + JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $2}'` + ;; + "Java(TM)") + JVM_VENDOR=Oracle + # this will be "64-Bit" or "32-Bit" + JVM_ARCH=`echo "$java_ver_output" | awk 'NR==3 {print $3}'` + ;; + *) + # Help fill in other JVM values + JVM_VENDOR=other + JVM_ARCH=unknown + ;; + esac + + # Sets the path where logback and GC logs are written. + if [ "x$CASSANDRA_LOG_DIR" = "x" ] ; then + CASSANDRA_LOG_DIR="$CASSANDRA_HOME/logs" + fi + + #GC log path has to be defined here because it needs to access CASSANDRA_HOME + JVM_OPTS="$JVM_OPTS -Xloggc:${CASSANDRA_LOG_DIR}/gc.log" + + # Here we create the arguments that will get passed to the jvm when + # starting cassandra. + + # Read user-defined JVM options from jvm.options file + JVM_OPTS_FILE=$CASSANDRA_CONF/jvm.options + for opt in `grep "^-" $JVM_OPTS_FILE` + do + JVM_OPTS="$JVM_OPTS $opt" + done + + # Check what parameters were defined on jvm.options file to avoid conflicts + echo $JVM_OPTS | grep -q Xmn + DEFINED_XMN=$? + echo $JVM_OPTS | grep -q Xmx + DEFINED_XMX=$? + echo $JVM_OPTS | grep -q Xms + DEFINED_XMS=$? + echo $JVM_OPTS | grep -q UseConcMarkSweepGC + USING_CMS=$? + echo $JVM_OPTS | grep -q UseG1GC + USING_G1=$? + + # Override these to set the amount of memory to allocate to the JVM at + # start-up. For production use you may wish to adjust this for your + # environment. MAX_HEAP_SIZE is the total amount of memory dedicated + # to the Java heap. HEAP_NEWSIZE refers to the size of the young + # generation. Both MAX_HEAP_SIZE and HEAP_NEWSIZE should be either set + # or not (if you set one, set the other). + # + # The main trade-off for the young generation is that the larger it + # is, the longer GC pause times will be. The shorter it is, the more + # expensive GC will be (usually). + # + # The example HEAP_NEWSIZE assumes a modern 8-core+ machine for decent pause + # times. If in doubt, and if you do not particularly want to tweak, go with + # 100 MB per physical CPU core. + + #MAX_HEAP_SIZE="4G" + #HEAP_NEWSIZE="800M" + + # Set this to control the amount of arenas per-thread in glibc + #export MALLOC_ARENA_MAX=4 + + # only calculate the size if it's not set manually + if [ "x$MAX_HEAP_SIZE" = "x" ] && [ "x$HEAP_NEWSIZE" = "x" -o $USING_G1 -eq 0 ]; then + calculate_heap_sizes + elif [ "x$MAX_HEAP_SIZE" = "x" ] || [ "x$HEAP_NEWSIZE" = "x" -a $USING_G1 -ne 0 ]; then + echo "please set or unset MAX_HEAP_SIZE and HEAP_NEWSIZE in pairs when using CMS GC (see cassandra-env.sh)" + exit 1 + fi + + if [ "x$MALLOC_ARENA_MAX" = "x" ] ; then + export MALLOC_ARENA_MAX=4 + fi + + # We only set -Xms and -Xmx if they were not defined on jvm.options file + # If defined, both Xmx and Xms should be defined together. + if [ $DEFINED_XMX -ne 0 ] && [ $DEFINED_XMS -ne 0 ]; then + JVM_OPTS="$JVM_OPTS -Xms${MAX_HEAP_SIZE}" + JVM_OPTS="$JVM_OPTS -Xmx${MAX_HEAP_SIZE}" + elif [ $DEFINED_XMX -ne 0 ] || [ $DEFINED_XMS -ne 0 ]; then + echo "Please set or unset -Xmx and -Xms flags in pairs on jvm.options file." + exit 1 + fi + + # We only set -Xmn flag if it was not defined in jvm.options file + # and if the CMS GC is being used + # If defined, both Xmn and Xmx should be defined together. + if [ $DEFINED_XMN -eq 0 ] && [ $DEFINED_XMX -ne 0 ]; then + echo "Please set or unset -Xmx and -Xmn flags in pairs on jvm.options file." + exit 1 + elif [ $DEFINED_XMN -ne 0 ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -Xmn${HEAP_NEWSIZE}" + fi + + if [ "$JVM_ARCH" = "64-Bit" ] && [ $USING_CMS -eq 0 ]; then + JVM_OPTS="$JVM_OPTS -XX:+UseCondCardMark" + fi + + # provides hints to the JIT compiler + JVM_OPTS="$JVM_OPTS -XX:CompileCommandFile=$CASSANDRA_CONF/hotspot_compiler" + + # add the jamm javaagent + JVM_OPTS="$JVM_OPTS -javaagent:$CASSANDRA_HOME/lib/jamm-0.3.0.jar" + + # set jvm HeapDumpPath with CASSANDRA_HEAPDUMP_DIR + if [ "x$CASSANDRA_HEAPDUMP_DIR" != "x" ]; then + JVM_OPTS="$JVM_OPTS -XX:HeapDumpPath=$CASSANDRA_HEAPDUMP_DIR/cassandra-`date +%s`-pid$$.hprof" + fi + + # stop the jvm on OutOfMemoryError as it can result in some data corruption + # uncomment the preferred option + # ExitOnOutOfMemoryError and CrashOnOutOfMemoryError require a JRE greater or equals to 1.7 update 101 or 1.8 update 92 + # For OnOutOfMemoryError we cannot use the JVM_OPTS variables because bash commands split words + # on white spaces without taking quotes into account + # JVM_OPTS="$JVM_OPTS -XX:+ExitOnOutOfMemoryError" + # JVM_OPTS="$JVM_OPTS -XX:+CrashOnOutOfMemoryError" + JVM_ON_OUT_OF_MEMORY_ERROR_OPT="-XX:OnOutOfMemoryError=kill -9 %p" + + # print an heap histogram on OutOfMemoryError + # JVM_OPTS="$JVM_OPTS -Dcassandra.printHeapHistogramOnOutOfMemoryError=true" + + # jmx: metrics and administration interface + # + # add this if you're having trouble connecting: + # JVM_OPTS="$JVM_OPTS -Djava.rmi.server.hostname=" + # + # see + # https://blogs.oracle.com/jmxetc/entry/troubleshooting_connection_problems_in_jconsole + # for more on configuring JMX through firewalls, etc. (Short version: + # get it working with no firewall first.) + # + # Cassandra ships with JMX accessible *only* from localhost. + # To enable remote JMX connections, uncomment lines below + # with authentication and/or ssl enabled. See https://wiki.apache.org/cassandra/JmxSecurity + # + if [ "x$LOCAL_JMX" = "x" ]; then + LOCAL_JMX=yes + fi + + # Specifies the default port over which Cassandra will be available for + # JMX connections. + # For security reasons, you should not expose this port to the internet. Firewall it if needed. + JMX_PORT="7199" + + if [ "$LOCAL_JMX" = "yes" ]; then + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.local.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" + else + JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.rmi.port=$JMX_PORT" + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.authenticate=false" + fi + + # jmx authentication and authorization options. By default, auth is only + # activated for remote connections but they can also be enabled for local only JMX + ## Basic file based authn & authz + JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.password.file=/etc/cassandra/jmxremote.password" + #JVM_OPTS="$JVM_OPTS -Dcom.sun.management.jmxremote.access.file=/etc/cassandra/jmxremote.access" + ## Custom auth settings which can be used as alternatives to JMX's out of the box auth utilities. + ## JAAS login modules can be used for authentication by uncommenting these two properties. + ## Cassandra ships with a LoginModule implementation - org.apache.cassandra.auth.CassandraLoginModule - + ## which delegates to the IAuthenticator configured in cassandra.yaml. See the sample JAAS configuration + ## file cassandra-jaas.config + #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.remote.login.config=CassandraLogin" + #JVM_OPTS="$JVM_OPTS -Djava.security.auth.login.config=$CASSANDRA_CONF/cassandra-jaas.config" + + ## Cassandra also ships with a helper for delegating JMX authz calls to the configured IAuthorizer, + ## uncomment this to use it. Requires one of the two authentication options to be enabled + #JVM_OPTS="$JVM_OPTS -Dcassandra.jmx.authorizer=org.apache.cassandra.auth.jmx.AuthorizationProxy" + + # To use mx4j, an HTML interface for JMX, add mx4j-tools.jar to the lib/ + # directory. + # See http://cassandra.apache.org/doc/3.11/operating/metrics.html#jmx + # By default mx4j listens on the broadcast_address, port 8081. Uncomment the following lines + # to control its listen address and port. + #MX4J_ADDRESS="-Dmx4jaddress=127.0.0.1" + #MX4J_PORT="-Dmx4jport=8081" + + # Cassandra uses SIGAR to capture OS metrics CASSANDRA-7838 + # for SIGAR we have to set the java.library.path + # to the location of the native libraries. + JVM_OPTS="$JVM_OPTS -Djava.library.path=$CASSANDRA_HOME/lib/sigar-bin" + + JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" + JVM_OPTS="$JVM_OPTS $MX4J_PORT" + JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" diff --git a/helm/atlas/charts/cassandra/templates/configmap.yaml b/helm/atlas/charts/cassandra/templates/configmap.yaml new file mode 100755 index 00000000000..d901482ba40 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/configmap.yaml @@ -0,0 +1,14 @@ +{{- if .Values.configOverrides }} +kind: ConfigMap +apiVersion: v1 +metadata: + name: {{ template "cassandra.name" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{ toYaml .Values.configOverrides | indent 2 }} +{{- end }} diff --git a/helm/atlas/charts/cassandra/templates/pdb.yaml b/helm/atlas/charts/cassandra/templates/pdb.yaml new file mode 100755 index 00000000000..d87ade8a533 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/pdb.yaml @@ -0,0 +1,22 @@ +{{- if .Values.podDisruptionBudget -}} +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + labels: + app: {{ template "cassandra.name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version }} + heritage: {{ .Release.Service }} + release: {{ .Release.Name }} + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} +spec: + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} +{{ toYaml .Values.podDisruptionBudget | indent 2 }} +{{- end -}} diff --git a/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml b/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml new file mode 100644 index 00000000000..6a93421e33a --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml @@ -0,0 +1,80 @@ +{{- if .Values.reaper.enable -}} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "cassandra.fullname" . }}-reaper-cron + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }}-reaper + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + schedule: "00 11 * * 6" #At 11 AM Saturday, UTC. + failedJobsHistoryLimit: {{ .Values.reaper.failedJobsHistoryLimit }} + successfulJobsHistoryLimit: {{ .Values.reaper.successfulJobsHistoryLimit }} + concurrencyPolicy: {{ .Values.reaper.concurrencyPolicy }} + jobTemplate: + spec: + activeDeadlineSeconds: {{ .Values.reaper.activeDeadlineSeconds }} + backoffLimit: {{ .Values.reaper.backoffLimit }} + template: + spec: + restartPolicy: {{ .Values.reaper.restartPolicy }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 14 }} + {{- end }} + containers: + - name: reaper + {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper }} + image: {{ .Values.multiarch.image.reaper }} + {{- else }} + image: "{{ .Values.reaper.image.repository }}:{{ .Values.reaper.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.reaper.image.pullPolicy }} + env: + {{- range $key, $value := .Values.reaper.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + ports: + - name: http + containerPort: {{ .Values.reaper.port }} + protocol: TCP + livenessProbe: + httpGet: + path: /ping + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /ping + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + resources: + requests: + cpu: {{ .Values.reaper.resources.requests.cpu }} + memory: {{ .Values.reaper.resources.requests.memory }} + limits: + cpu: {{ .Values.reaper.resources.limits.cpu }} + memory: {{ .Values.reaper.resources.limits.memory }} + - name: reaper-repair-sidecar + {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper_repair }} + image: {{ .Values.multiarch.image.reaper_repair }} + {{- else }} + image: "{{.Values.reaper.sidecar.image.repository}}:{{.Values.reaper.sidecar.image.tag}}" + {{- end }} + imagePullPolicy: {{.Values.reaper.sidecar.image.pullPolicy}} + env: + {{- range $key, $value := .Values.reaper.sidecar.env }} + - name: {{ $key }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.reaper.sidecar.resources | nindent 20 }} + imagePullSecrets: + - name: {{ .Values.image.pullSecrets }} +{{- end}} \ No newline at end of file diff --git a/helm/atlas/charts/cassandra/templates/service.yaml b/helm/atlas/charts/cassandra/templates/service.yaml new file mode 100755 index 00000000000..8854e80a956 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/service.yaml @@ -0,0 +1,46 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- with .Values.service.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + clusterIP: None + type: {{ .Values.service.type }} + ports: + {{- if .Values.exporter.enabled }} + - name: metrics + port: 5556 + targetPort: {{ .Values.exporter.port }} + {{- end }} + - name: intra + port: 7000 + targetPort: 7000 + - name: tls + port: 7001 + targetPort: 7001 + - name: jmx + port: 7199 + targetPort: 7199 + - name: cql + port: {{ default 9042 .Values.config.ports.cql }} + targetPort: {{ default 9042 .Values.config.ports.cql }} + - name: thrift + port: {{ default 9160 .Values.config.ports.thrift }} + targetPort: {{ default 9160 .Values.config.ports.thrift }} + {{- if .Values.config.ports.agent }} + - name: agent + port: {{ .Values.config.ports.agent }} + targetPort: {{ .Values.config.ports.agent }} + {{- end }} + selector: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} diff --git a/helm/atlas/charts/cassandra/templates/servicemonitor.yaml b/helm/atlas/charts/cassandra/templates/servicemonitor.yaml new file mode 100755 index 00000000000..7f8afabc7d5 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/servicemonitor.yaml @@ -0,0 +1,29 @@ +{{- if and .Values.exporter.enabled .Values.exporter.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "cassandra.fullname" . }} + {{- if .Values.exporter.serviceMonitor.namespace }} + namespace: {{ .Values.exporter.serviceMonitor.namespace }} + {{- end }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: "prometheus-operator" + heritage: {{ .Release.Service }} + {{- if .Values.exporter.serviceMonitor.additionalLabels }} +{{ toYaml .Values.exporter.serviceMonitor.additionalLabels | indent 4 }} + {{- end }} +spec: + jobLabel: {{ template "cassandra.name" . }} + endpoints: + - port: metrics + interval: 60s + path: /metrics + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + namespaceSelector: + any: true + {{- end }} diff --git a/helm/atlas/charts/cassandra/templates/statefulset.yaml b/helm/atlas/charts/cassandra/templates/statefulset.yaml new file mode 100755 index 00000000000..52070022758 --- /dev/null +++ b/helm/atlas/charts/cassandra/templates/statefulset.yaml @@ -0,0 +1,434 @@ +{{- $cloud := .Values.global.cloud }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "cassandra.fullname" . }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "cassandra.name" . }} + chart: {{ template "cassandra.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "cassandra-config" +spec: + selector: + matchLabels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + serviceName: {{ template "cassandra.fullname" . }} + replicas: {{ .Values.config.cluster_size }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy.type }} + template: + metadata: + labels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} +{{- if .Values.podLabels }} +{{ toYaml .Values.podLabels | indent 8 }} +{{- end }} +{{- if .Values.podAnnotations }} + annotations: +{{ toYaml .Values.podAnnotations | indent 8 }} +{{- end }} + spec: + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + hostNetwork: {{ .Values.hostNetwork }} +{{- if .Values.selector }} +{{ toYaml .Values.selector | indent 6 }} +{{- end }} + {{- if .Values.securityContext.enabled }} + securityContext: + fsGroup: {{ .Values.securityContext.fsGroup }} + runAsUser: {{ .Values.securityContext.runAsUser }} + {{- end }} +{{- $tierType := .Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + affinity: + nodeAffinity: + {{- if eq .Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + podAntiAffinity: + {{- if eq .Values.antiAffinity "soft" }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 50 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.affinity.podAntiAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} +{{- end }} +{{- if .Values.tolerations }} + tolerations: +{{ toYaml .Values.tolerations | indent 8 }} +{{- end }} +# {{- if .Values.configOverrides }} +# initContainers: +# - name: config-copier +# image: ghcr.io/atlanhq/busybox:1.31 +# command: [ 'sh', '-c', 'cp /configmap-files/* /cassandra-configs/ && chown 999:999 /cassandra-configs/*'] +# volumeMounts: +# {{- range $key, $value := .Values.configOverrides }} +# - name: cassandra-config-{{ $key | replace "." "-" | replace "_" "--" }} +# mountPath: /configmap-files/{{ $key }} +# subPath: {{ $key }} +# {{- end }} +# - name: cassandra-configs +# mountPath: /cassandra-configs/ +# {{- end }} + initContainers: + - name: copy-config + {{- if and .Values.multiarch.enabled .Values.multiarch.image.busybox }} + image: {{ .Values.multiarch.image.busybox }} + {{- else }} + image: ghcr.io/atlanhq/busybox:1.32 + {{- end }} + command: ['sh', '-c', 'cp /config/cassandra.yaml /config/cassandra-env.sh /mnt/'] + volumeMounts: + - name: config + mountPath: /config/cassandra.yaml + subPath: cassandra.yaml + - name: config + mountPath: /config/cassandra-env.sh + subPath: cassandra-env.sh + - name: pre-install + mountPath: /mnt + containers: +{{- if .Values.exporter.enabled }} + - name: cassandra-exporter + {{- if and .Values.multiarch.enabled .Values.multiarch.image.cassandra_exporter }} + image: {{ .Values.multiarch.image.cassandra_exporter }} + {{- else }} + image: "{{ .Values.exporter.image.repo }}:{{ .Values.exporter.image.tag }}" + {{- end }} + resources: +{{ toYaml .Values.exporter.resources | indent 10 }} + env: + - name: CASSANDRA_EXPORTER_CONFIG_listenPort + value: {{ .Values.exporter.port | quote }} + - name: JVM_OPTS + value: {{ .Values.exporter.jvmOpts | quote }} + ports: + - name: metrics + containerPort: {{ .Values.exporter.port }} + protocol: TCP + - name: jmx + containerPort: 5555 + livenessProbe: + tcpSocket: + port: {{ .Values.exporter.port }} + readinessProbe: + httpGet: + path: /metrics + port: {{ .Values.exporter.port }} + initialDelaySeconds: 40 + timeoutSeconds: 45 +{{- end }} + - name: {{ template "cassandra.fullname" . }} + {{- if and .Values.multiarch.enabled .Values.multiarch.image.cassandra }} + image: {{ .Values.multiarch.image.cassandra }} + {{- else }} + image: "{{ .Values.image.repo }}:{{ .Values.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} +{{- if .Values.commandOverrides }} + command: {{ .Values.commandOverrides }} +{{- end }} +{{- if .Values.argsOverrides }} + args: {{ .Values.argsOverrides }} +{{- end }} +{{- $tierType := .Values.global.Tier_Type | default "" }} +{{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + resources: +{{ toYaml .Values.resources | indent 10 }} +{{- end }} + env: + {{- $seed_size := default 1 .Values.config.seed_size | int -}} + {{- $global := . }} + - name: CASSANDRA_SEEDS + {{- if .Values.hostNetwork }} + value: {{ required "You must fill \".Values.config.seeds\" with list of Cassandra seeds when hostNetwork is set to true" .Values.config.seeds | quote }} + {{- else }} + value: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-1.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-2.atlas-cassandra.atlas.svc.cluster.local" + {{- end }} + - name: MAX_HEAP_SIZE + value: {{ default "8192M" .Values.max_heap_size | quote }} + - name: HEAP_NEWSIZE + value: {{ default "200M" .Values.heap_new_size | quote }} + - name: CASSANDRA_ENDPOINT_SNITCH + value: {{ default "SimpleSnitch" .Values.config.endpoint_snitch | quote }} + - name: CASSANDRA_CLUSTER_NAME + value: {{ default "Cassandra" .Values.config.cluster_name | quote }} + - name: CASSANDRA_DC + value: {{ default "DC1" .Values.config.dc_name | quote }} + - name: CASSANDRA_RACK + value: {{ default "RAC1" .Values.config.rack_name | quote }} + - name: CASSANDRA_START_RPC + value: {{ default "false" .Values.config.start_rpc | quote }} + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: LOCAL_JMX + value: {{ default "no" .Values.config.local_jmx | quote }} + {{- range $key, $value := .Values.env }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} + livenessProbe: + exec: + command: [ "/bin/sh", "-c", "nodetool -h ::FFFF:127.0.0.1 status" ] + initialDelaySeconds: {{ .Values.livenessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.livenessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.livenessProbe.timeoutSeconds }} + successThreshold: {{ .Values.livenessProbe.successThreshold }} + failureThreshold: {{ .Values.livenessProbe.failureThreshold }} + readinessProbe: + exec: + command: [ "/bin/sh", "-c", "nodetool -h ::FFFF:127.0.0.1 status | grep -E \"^UN\\s+{{ .Values.readinessProbe.address }}\"" ] + initialDelaySeconds: {{ .Values.readinessProbe.initialDelaySeconds }} + periodSeconds: {{ .Values.readinessProbe.periodSeconds }} + timeoutSeconds: {{ .Values.readinessProbe.timeoutSeconds }} + successThreshold: {{ .Values.readinessProbe.successThreshold }} + failureThreshold: {{ .Values.readinessProbe.failureThreshold }} + ports: + - name: intra + containerPort: 7000 + - name: tls + containerPort: 7001 + - name: jmx + containerPort: 7199 + - name: cql + containerPort: {{ default 9042 .Values.config.ports.cql }} + - name: thrift + containerPort: {{ default 9160 .Values.config.ports.thrift }} + {{- if .Values.config.ports.agent }} + - name: agent + containerPort: {{ .Values.config.ports.agent }} + {{- end }} + volumeMounts: + - name: pre-install + mountPath: /etc/cassandra/cassandra.yaml + subPath: cassandra.yaml + - name: pre-install + mountPath: /etc/cassandra/cassandra-env.sh + subPath: cassandra-env.sh + - name: data + mountPath: /var/lib/cassandra + {{- if .Values.configOverrides }} + - name: cassandra-configs + mountPath: /etc/cassandra + {{- end }} + {{- if .Values.extraVolumeMounts }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 8 }} + {{- end }} + {{- end }} + {{- if not .Values.persistence.enabled }} + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "exec nodetool decommission"] + {{- end }} + - name: cassandra-icarus + {{- if and .Values.multiarch.enabled .Values.multiarch.image.icarus }} + image: {{ .Values.multiarch.image.icarus }} + {{- else }} + image: "{{ .Values.icarus.image.repo }}:{{ .Values.icarus.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + securityContext: + runAsUser: {{.Values.securityContext.runAsUser}} + runAsGroup: {{ .Values.securityContext.fsGroup }} + resources: +{{ toYaml .Values.icarus.resources | indent 10 }} + env: + - name: JMX_HOST + value: localhost + - name: JMX_PORT + value: "7199" + - name: POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + {{- if eq $cloud "gcp" }} + - name: GOOGLE_APPLICATION_CREDENTIALS + value: "/var/secrets/google/gcp_credentials.json" + {{- end }} + {{- if eq $cloud "azure" }} + - name: AZURE_STORAGE_KEY + valueFrom: + secretKeyRef: + name: azurestorage + key: azure.client.default.key + - name: AZURE_STORAGE_ACCOUNT + valueFrom: + secretKeyRef: + name: azurestorage + key: azure.client.default.account + {{- end}} + ports: + - containerPort: {{ .Values.icarus.port }} + name: http + # Health checks for Icarus based on the API spec + readinessProbe: + httpGet: + path: /version # API endpoint to check version + port: {{ .Values.icarus.port }} + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 5 + successThreshold: 1 + failureThreshold: 3 + + livenessProbe: + httpGet: + path: /version # Same endpoint can be used to verify service is alive + port: {{ .Values.icarus.port }} + initialDelaySeconds: 60 # Give it time to start + periodSeconds: 30 + timeoutSeconds: 5 + failureThreshold: 3 + + # Wait for Cassandra container to be ready before starting + startupProbe: + httpGet: + path: /version + port: {{ .Values.icarus.port }} + failureThreshold: 30 # Allow more time during startup + periodSeconds: 10 + volumeMounts: + - name: pre-install + mountPath: /etc/cassandra/cassandra.yaml + subPath: cassandra.yaml + - name: pre-install + mountPath: /etc/cassandra/cassandra-env.sh + subPath: cassandra-env.sh + - name: data + mountPath: /var/lib/cassandra + {{- if eq $cloud "gcp" }} + - name: gcp-creds + mountPath: /var/secrets/google/gcp_credentials.json + subPath: gcp_credentials.json + readOnly: true + {{- end }} + + {{- if .Values.extraContainers }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} + terminationGracePeriodSeconds: {{ default 30 .Values.podSettings.terminationGracePeriodSeconds }} + {{- if .Values.image.pullSecrets }} + imagePullSecrets: + - name: {{ .Values.image.pullSecrets }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- if or .Values.configOverrides (not .Values.persistence.enabled) }} + volumes: +{{- end }} +{{- range $key, $value := .Values.configOverrides }} + - configMap: + name: cassandra + name: cassandra-config-{{ $key | replace "." "-" | replace "_" "--" }} +{{- end }} +{{- if .Values.configOverrides }} + - name: cassandra-configs + emptyDir: {} +{{- end }} +{{- if not .Values.persistence.enabled }} + - name: data + emptyDir: {} +{{- else }} +{{- if .Values.extraVolumes }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + volumes: + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if eq $cloud "gcp" }} + - name: gcp-creds + secret: + secretName: gcp-creds-secret-manager + items: + - key: GOOGLE_APPLICATION_CREDENTIALS + path: gcp_credentials.json + {{- end }} + + volumeClaimTemplates: + - metadata: + name: data + labels: + app: {{ template "cassandra.name" . }} + release: {{ .Release.Name }} + spec: + accessModes: + - {{ .Values.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.persistence.size | quote }} + {{- if .Values.persistence.storageClass }} + {{- if (eq "-" .Values.persistence.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: "{{ .Values.persistence.storageClass }}" + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/atlas/charts/cassandra/values.yaml b/helm/atlas/charts/cassandra/values.yaml new file mode 100755 index 00000000000..029505d9efe --- /dev/null +++ b/helm/atlas/charts/cassandra/values.yaml @@ -0,0 +1,400 @@ + +multiarch: + enabled: false + image: {} + + +## Cassandra image version +## ref: https://hub.docker.com/r/library/cassandra/ +global: + Tier_Type: "" +image: + repo: ghcr.io/atlanhq/cassandra + tag: 3.11.12 + pullPolicy: IfNotPresent + ## Specify ImagePullSecrets for Pods + ## ref: https://kubernetes.io/docs/concepts/containers/images/#specifying-imagepullsecrets-on-a-pod + # pullSecrets: myregistrykey + +## Specify a service type +## ref: http://kubernetes.io/docs/user-guide/services/ +service: + type: ClusterIP + annotations: "" + +## Use an alternate scheduler, e.g. "stork". +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +# schedulerName: + +## Persist data to a persistent volume +# persistence: {} + # enabled: true + ## cassandra data Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "-" + # accessMode: ReadWriteOnce + # size: 10Gi + +## Configure resource requests and limits +## ref: http://kubernetes.io/docs/user-guide/compute-resources/ +## Minimum memory for development is 4GB and 2 CPU cores +## Minimum memory for production is 8GB and 4 CPU cores +## ref: http://docs.datastax.com/en/archived/cassandra/2.0/cassandra/architecture/architecturePlanningHardware_c.html +# resources: {} + # requests: + # memory: 4Gi + # cpu: 2 + # limits: + # memory: 4Gi + # cpu: 2 + +## Change cassandra configuration parameters below: +## ref: http://docs.datastax.com/en/cassandra/3.0/cassandra/configuration/configCassandra_yaml.html +## Recommended max heap size is 1/2 of system memory +## Recommended heap new size is 1/4 of max heap size +## ref: http://docs.datastax.com/en/cassandra/3.0/cassandra/operations/opsTuneJVM.html +# config: +# cluster_domain: cluster.local +# cluster_name: cassandra +# cluster_size: 3 +# seed_size: 2 +# num_tokens: 256 +# # If you want Cassandra to use this datacenter and rack name, +# # you need to set endpoint_snitch to GossipingPropertyFileSnitch. +# # Otherwise, these values are ignored and datacenter1 and rack1 +# # are used. +# dc_name: DC1 +# rack_name: RAC1 +# endpoint_snitch: SimpleSnitch +# max_heap_size: 2048M +# heap_new_size: 512M +# start_rpc: false +# ports: +# cql: 9042 +# thrift: 9160 +# # If a JVM Agent is in place +# # agent: 61621 + +# Config for cassandra + +max_heap_size: 2048M +heap_new_size: 512M + +config: + cluster_domain: cluster.local + cluster_name: cassandra + cluster_size: 3 + dc_name: datacenter1 + rack_name: rack1 + endpoint_snitch: GossipingPropertyFileSnitch + seed_size: 3 + start_rpc: true + ports: + cql: 9042 + + +## Cassandra config files overrides +configOverrides: {} + +## Cassandra docker command overrides +commandOverrides: [] + +## Cassandra docker args overrides +argsOverrides: [] + +## Custom env variables. +## ref: https://hub.docker.com/_/cassandra/ +env: {} + +## Liveness and Readiness probe values. +## ref: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ +livenessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 40 + successThreshold: 1 + failureThreshold: 3 +readinessProbe: + initialDelaySeconds: 120 + periodSeconds: 30 + timeoutSeconds: 40 + successThreshold: 1 + failureThreshold: 3 + address: "${POD_IP}" + +## Configure node selector. Edit code below for adding selector to pods +## ref: https://kubernetes.io/docs/user-guide/node-selection/ +# selector: + # nodeSelector: + # cloud.google.com/gke-nodepool: pool-db + +## Additional pod annotations +## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +# podAnnotations: {} + +## Additional pod labels +## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +## Additional pod-level settings +podSettings: + # Change this to give pods more time to properly leave the cluster when not using persistent storage. + terminationGracePeriodSeconds: 30 + +## Pod distruption budget +podDisruptionBudget: + # maxUnavailable: 1 + minAvailable: 2 + +podManagementPolicy: OrderedReady + + +updateStrategy: + type: RollingUpdate + +resources: + requests: + memory: 4Gi + cpu: 500m + limits: + memory: 5Gi + cpu: 3000m + +# Persistence changes for cassandra +persistence: + enabled: true + accessMode: ReadWriteOnce + size: 10Gi + +nodeSelector: {} + +podAnnotations: {} + # backup.velero.io/backup-volumes: data + +## Pod Security Context +securityContext: + enabled: false + fsGroup: 999 + runAsUser: 999 +# PriorityClassName +priorityClassName: "" + + +antiAffinity: "hard" + +custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + +## Affinity for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + + +## Node tolerations for pod assignment +## Ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +tolerations: [] +rbac: + # Specifies whether RBAC resources should be created + create: true + +serviceAccount: + # Specifies whether a ServiceAccount should be created + create: true + # The name of the ServiceAccount to use. + # If not set and create is true, a name is generated using the fullname template + # name: + +# Use host network for Cassandra pods +# You must pass seed list into config.seeds property if set to true +hostNetwork: false + +## Backup cronjob configuration +## Ref: https://github.com/maorfr/cain +# Cassandra backup configuration +backup: + enabled: false + schedule: + - keyspace: atlas + cron: "0 3 * * *" + annotations: {} + image: + repository: ghcr.io/atlanhq/cain + tag: 0.6.0 + # Name of the secret containing the credentials of the service account used by GOOGLE_APPLICATION_CREDENTIALS, as a credentials.json file + extraArgs: + - -c + - atlas-cassandra + google: + serviceAccountSecret: + env: + - name: AWS_REGION + value: "" + resources: + requests: + memory: 1Gi + limits: + memory: 1Gi + destination: "" + + +## Cassandra exported configuration +## ref: https://github.com/criteo/cassandra_exporter +exporter: + # If exporter is enabled this will create a ServiceMonitor by default as well + enabled: true + serviceMonitor: + enabled: true + namespace: monitoring + lables: + release: "prometheus-operator" + additionalLabels: {} + # prometheus: default + image: + repo: ghcr.io/atlanhq/cassandra_exporter + tag: 2.0.2 + port: 5556 + jvmOpts: "" + resources: {} + # limits: + # cpu: 1 + # memory: 1Gi + # requests: + # cpu: 1 + # memory: 1Gi + +## Sidecar for backup/restore +## ref: https://github.com/instaclustr/icarus +icarus: + image: + repo: ghcr.io/atlanhq/cassandra-icarus-atlan-v2 + tag: 1.0.4 + port: 4567 + jvmOpts: "" + resources: + limits: + cpu: 1 + memory: 8Gi + requests: + cpu: 0.5 + memory: 2Gi + +extraVolumes: + - name: varlog + emptyDir: {} + - name: config + configMap: + name: cassandra-config + - name: pre-install + emptyDir: {} + +extraVolumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + +extraContainers: + - name: cassandra-gclog + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/gc.log.0.current'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + - name: cassandra-systemlog + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/system.log'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ + - name: cassandra-debug + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /var/log/cassandra/debug.log'] + volumeMounts: + - name: varlog + mountPath: /var/log/cassandra/ +janusgraph: + atomic_mutation: true + cassandra_batch_size_warn_threshold_in_kb: 4096 + cassandra_batch_size_failure_threshold_in_kb: 16384 + commitlog_segment_size_in_mb: 64 + +reaper: + enable: true + image: + repository: ghcr.io/atlanhq/cassandra-reaper + tag: 3.4.0 + pullPolicy: IfNotPresent + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + concurrencyPolicy: Forbid + activeDeadlineSeconds: 129600 # 36 hours + backoffLimit: 0 + port: 8080 + resources: + requests: + memory: 512Mi + cpu: 100m + limits: + memory: 2Gi + cpu: 500m + env: + REAPER_AUTH_ENABLED: "false" + CASSANDRA_REAPER_STORAGE_TYPE: "cassandra" + CASSANDRA_REAPER_CASS_CONTACT_POINTS: "datacenter1" + CASSANDRA_REAPER_CASS_CLUSTER_NAME: "[atlas-cassandra.atlas.svc.cluster.local]" + REAPER_CASS_KEYSPACE: "reaper_db" + REAPER_ENABLE_DYNAMIC_SEED_LIST: "true" + REAPER_ENABLE_CROSS_ORIGIN: "false" + REAPER_HANGING_REPAIR_TIMEOUT_MINS: "10" + REAPER_REPAIR_INTENSITY: "0.9" + REAPER_REPAIR_PARALELLISM: "SEQUENTIAL" + restartPolicy: OnFailure + sidecar: + image: + repository: ghcr.io/atlanhq/cassandra-reaper-atlan-v2 + tag: 1.4.0 + pullPolicy: IfNotPresent + resources: + requests: + memory: 100Mi + limits: + memory: 512Mi + env: + REAPER_URL: "http://localhost:8080" + CLUSTER_SEED_HOST: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-1.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-2.atlas-cassandra.atlas.svc.cluster.local" + KEYSPACE_NAME: "atlas" + PROMETHEUS_PUSHGATEWAY: "http://prometheus-pushgateway.monitoring.svc.cluster.local:9091" + POLLING_FREQUENCY_IN_SECONDS: "900" + + \ No newline at end of file diff --git a/helm/atlas/charts/elasticsearch/.helmignore b/helm/atlas/charts/elasticsearch/.helmignore new file mode 100755 index 00000000000..e12c0b4b918 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/.helmignore @@ -0,0 +1,2 @@ +tests/ +.pytest_cache/ diff --git a/helm/atlas/charts/elasticsearch/Chart.yaml b/helm/atlas/charts/elasticsearch/Chart.yaml new file mode 100755 index 00000000000..6606ba2df06 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +appVersion: 7.6.1 +description: Official Elastic helm chart for Elasticsearch +home: https://github.com/elastic/helm-charts +icon: https://helm.elastic.co/icons/elasticsearch.png +maintainers: +- email: helm-charts@elastic.co + name: Elastic +name: elasticsearch +sources: +- https://github.com/elastic/elasticsearch +version: 7.6.1 diff --git a/helm/atlas/charts/elasticsearch/Makefile b/helm/atlas/charts/elasticsearch/Makefile new file mode 100755 index 00000000000..22218a1f62a --- /dev/null +++ b/helm/atlas/charts/elasticsearch/Makefile @@ -0,0 +1 @@ +include ../helpers/common.mk diff --git a/helm/atlas/charts/elasticsearch/README.md b/helm/atlas/charts/elasticsearch/README.md new file mode 100755 index 00000000000..cfa17a30dad --- /dev/null +++ b/helm/atlas/charts/elasticsearch/README.md @@ -0,0 +1,367 @@ +# Elasticsearch Helm Chart + +This functionality is in beta and is subject to change. The design and code is less mature than official GA features and is being provided as-is with no warranties. Beta features are not subject to the support SLA of official GA features. + +This helm chart is a lightweight way to configure and run our official [Elasticsearch docker image](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html) + +## Notice + +[7.6.1](https://github.com/elastic/helm-charts/releases/tag/7.6.1) release is introducing a change for Elasticsearch users upgrading from a previous chart version. +Following our recommandations, the change tracked in [#458](https://github.com/elastic/helm-charts/pull/458) is setting CPU request to the same value as CPU limit. + +For users which don't overwrite default values for CPU requests, Elasticsearch pod will now request `1000m` CPU instead of `100m` CPU. This may impact the resources (nodes) required in your Kubernetes cluster to deploy Elasticsearch chart. + +If you wish to come back to former values, you just need to override CPU requests when deploying your Helm Chart. + +- Overriding CPU requests in commandline argument: +``` +helm install --name elasticsearch --set resources.requests.cpu=100m elastic/elasticsearch +``` + +- Overriding CPU requests in your custom `values.yaml` file: +``` +resources: + requests: + cpu: "100m" +``` + +## Requirements + +* [Helm](https://helm.sh/) >=2.8.0 and <3.0.0 (see parent [README](https://github.com/elastic/helm-charts/tree/master/README.md) for more details) +* Kubernetes >=1.8 +* Minimum cluster requirements include the following to run this chart with default settings. All of these settings are configurable. + * Three Kubernetes nodes to respect the default "hard" affinity settings + * 1GB of RAM for the JVM heap + +## Usage notes and getting started + +* This repo includes a number of [example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) configurations which can be used as a reference. They are also used in the automated testing of this chart +* Automated testing of this chart is currently only run against GKE (Google Kubernetes Engine). +* The chart deploys a statefulset and by default will do an automated rolling update of your cluster. It does this by waiting for the cluster health to become green after each instance is updated. If you prefer to update manually you can set [`updateStrategy: OnDelete`](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#on-delete) +* It is important to verify that the JVM heap size in `esJavaOpts` and to set the CPU/Memory `resources` to something suitable for your cluster +* To simplify chart and maintenance each set of node groups is deployed as a separate helm release. Take a look at the [multi](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/multi) example to get an idea for how this works. Without doing this it isn't possible to resize persistent volumes in a statefulset. By setting it up this way it makes it possible to add more nodes with a new storage size then drain the old ones. It also solves the problem of allowing the user to determine which node groups to update first when doing upgrades or changes. +* We have designed this chart to be very un-opinionated about how to configure Elasticsearch. It exposes ways to set environment variables and mount secrets inside of the container. Doing this makes it much easier for this chart to support multiple versions with minimal changes. + +## Migration from helm/charts stable + +If you currently have a cluster deployed with the [helm/charts stable](https://github.com/helm/charts/tree/master/stable/elasticsearch) chart you can follow the [migration guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/migration/README.md) + +## Installing + +### Using Helm repository + +* Add the elastic helm charts repo + ``` + helm repo add elastic https://helm.elastic.co + ``` +* Install it + ``` + helm install --name elasticsearch elastic/elasticsearch + ``` + +### Using master branch + +* Clone the git repo + ``` + git clone git@github.com:elastic/helm-charts.git + ``` +* Install it + ``` + helm install --name elasticsearch ./helm-charts/elasticsearch + ``` + +## Compatibility + +This chart is tested with the latest supported versions. The currently tested versions are: + +| 6.x | 7.x | +| ----- | ----- | +| 6.8.7 | 7.6.1 | + +Examples of installing older major versions can be found in the [examples](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) directory. + +While only the latest releases are tested, it is possible to easily install old or new releases by overriding the `imageTag`. To install version `7.6.1` of Elasticsearch it would look like this: + +``` +helm install --name elasticsearch elastic/elasticsearch --set imageTag=7.6.1 +``` + +## Configuration + +| Parameter | Description | Default | +| ---------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------- | +| `clusterName` | This will be used as the Elasticsearch [cluster.name](https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster.name.html) and should be unique per cluster in the namespace | `elasticsearch` | +| `nodeGroup` | This is the name that will be used for each group of nodes in the cluster. The name will be `clusterName-nodeGroup-X`, `nameOverride-nodeGroup-X` if a nameOverride is specified, and `fullnameOverride-X` if a fullnameOverride is specified | `master` | +| `masterService` | Optional. The service name used to connect to the masters. You only need to set this if your master `nodeGroup` is set to something other than `master`. See [Clustering and Node Discovery](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#clustering-and-node-discovery) for more information | `` | +| `roles` | A hash map with the [specific roles](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html) for the node group | `master: true`
`data: true`
`ingest: true` | +| `replicas` | Kubernetes replica count for the statefulset (i.e. how many pods) | `3` | +| `minimumMasterNodes` | The value for [discovery.zen.minimum_master_nodes](https://www.elastic.co/guide/en/elasticsearch/reference/6.7/discovery-settings.html#minimum_master_nodes). Should be set to `(master_eligible_nodes / 2) + 1`. Ignored in Elasticsearch versions >= 7. | `2` | +| `esMajorVersion` | Used to set major version specific configuration. If you are using a custom image and not running the default Elasticsearch version you will need to set this to the version you are running (e.g. `esMajorVersion: 6`) | `""` | +| `esConfig` | Allows you to add any config files in `/usr/share/elasticsearch/config/` such as `elasticsearch.yml` and `log4j2.properties`. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example of the formatting. | `{}` | +| `extraEnvs` | Extra [environment variables](https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config) which will be appended to the `env:` definition for the container | `[]` | +| `extraVolumes` | Templatable string of additional volumes to be passed to the `tpl` function | `""` | +| `extraVolumeMounts` | Templatable string of additional volumeMounts to be passed to the `tpl` function | `""` | +| `extraContainers` | Templatable string of additional containers to be passed to the `tpl` function | `""` | +| `extraInitContainers` | Templatable string of additional init containers to be passed to the `tpl` function | `""` | +| `secretMounts` | Allows you easily mount a secret as a file inside the statefulset. Useful for mounting certificates and other secrets. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example | `[]` | +| `image` | The Elasticsearch docker image | `docker.elastic.co/elasticsearch/elasticsearch` | +| `imageTag` | The Elasticsearch docker image tag | `7.6.1` | +| `imagePullPolicy` | The Kubernetes [imagePullPolicy](https://kubernetes.io/docs/concepts/containers/images/#updating-images) value | `IfNotPresent` | +| `podAnnotations` | Configurable [annotations](https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/) applied to all Elasticsearch pods | `{}` | +| `labels` | Configurable [label](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) applied to all Elasticsearch pods | `{}` | +| `esJavaOpts` | [Java options](https://www.elastic.co/guide/en/elasticsearch/reference/current/jvm-options.html) for Elasticsearch. This is where you should configure the [jvm heap size](https://www.elastic.co/guide/en/elasticsearch/reference/current/heap-size.html) | `-Xmx1g -Xms1g` | +| `resources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the statefulset | `requests.cpu: 1000m`
`requests.memory: 2Gi`
`limits.cpu: 1000m`
`limits.memory: 2Gi` | +| `initResources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the initContainer in the statefulset | {} | +| `sidecarResources` | Allows you to set the [resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) for the sidecar containers in the statefulset | {} | +| `networkHost` | Value for the [network.host Elasticsearch setting](https://www.elastic.co/guide/en/elasticsearch/reference/current/network.host.html) | `0.0.0.0` | +| `volumeClaimTemplate` | Configuration for the [volumeClaimTemplate for statefulsets](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-storage). You will want to adjust the storage (default `30Gi`) and the `storageClassName` if you are using a different storage class | `accessModes: [ "ReadWriteOnce" ]`
`resources.requests.storage: 30Gi` | +| `persistence.annotations` | Additional persistence annotations for the `volumeClaimTemplate` | `{}` | +| `persistence.enabled` | Enables a persistent volume for Elasticsearch data. Can be disabled for nodes that only have [roles](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-node.html) which don't require persistent data. | `true` | +| `priorityClassName` | The [name of the PriorityClass](https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass). No default is supplied as the PriorityClass must be created first. | `""` | +| `antiAffinityTopologyKey` | The [anti-affinity topology key](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity). By default this will prevent multiple Elasticsearch nodes from running on the same Kubernetes node | `kubernetes.io/hostname` | +| `antiAffinity` | Setting this to hard enforces the [anti-affinity rules](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity). If it is set to soft it will be done "best effort". Other values will be ignored. | `hard` | +| `nodeAffinity` | Value for the [node affinity settings](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature) | `{}` | +| `podManagementPolicy` | By default Kubernetes [deploys statefulsets serially](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies). This deploys them in parallel so that they can discover eachother | `Parallel` | +| `protocol` | The protocol that will be used for the readinessProbe. Change this to `https` if you have `xpack.security.http.ssl.enabled` set | `http` | +| `httpPort` | The http port that Kubernetes will use for the healthchecks and the service. If you change this you will also need to set [http.port](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-http.html#_settings) in `extraEnvs` | `9200` | +| `transportPort` | The transport port that Kubernetes will use for the service. If you change this you will also need to set [transport port configuration](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-transport.html#_transport_settings) in `extraEnvs` | `9300` | +| `service.labels` | Labels to be added to non-headless service | `{}` | +| `service.labelsHeadless` | Labels to be added to headless service | `{}` | +| `service.type` | Type of elasticsearch service. [Service Types](https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types) | `ClusterIP` | +| `service.nodePort` | Custom [nodePort](https://kubernetes.io/docs/concepts/services-networking/service/#nodeport) port that can be set if you are using `service.type: nodePort`. | `` | +| `service.annotations` | Annotations that Kubernetes will use for the service. This will configure load balancer if `service.type` is `LoadBalancer` [Annotations](https://kubernetes.io/docs/concepts/services-networking/service/#ssl-support-on-aws) | `{}` | +| `service.httpPortName` | The name of the http port within the service | `http` | +| `service.transportPortName` | The name of the transport port within the service | `transport` | +| `service.loadBalancerSourceRanges` | The IP ranges that are allowed to access | `[]` | +| `updateStrategy` | The [updateStrategy](https://kubernetes.io/docs/tutorials/stateful-application/basic-stateful-set/#updating-statefulsets) for the statefulset. By default Kubernetes will wait for the cluster to be green after upgrading each pod. Setting this to `OnDelete` will allow you to manually delete each pod during upgrades | `RollingUpdate` | +| `maxUnavailable` | The [maxUnavailable](https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget) value for the pod disruption budget. By default this will prevent Kubernetes from having more than 1 unhealthy pod in the node group | `1` | +| `fsGroup (DEPRECATED)` | The Group ID (GID) for [securityContext.fsGroup](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/) so that the Elasticsearch user can read from the persistent volume | `` | +| `podSecurityContext` | Allows you to set the [securityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod) for the pod | `fsGroup: 1000`
`runAsUser: 1000` | +| `securityContext` | Allows you to set the [securityContext](https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container) for the container | `capabilities.drop:[ALL]`
`runAsNonRoot: true`
`runAsUser: 1000` | +| `terminationGracePeriod` | The [terminationGracePeriod](https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods) in seconds used when trying to stop the pod | `120` | +| `sysctlInitContainer.enabled` | Allows you to disable the sysctlInitContainer if you are setting vm.max_map_count with another method | `true` | +| `sysctlVmMaxMapCount` | Sets the [sysctl vm.max_map_count](https://www.elastic.co/guide/en/elasticsearch/reference/current/vm-max-map-count.html#vm-max-map-count) needed for Elasticsearch | `262144` | +| `readinessProbe` | Configuration fields for the [readinessProbe](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/) | `failureThreshold: 3`
`initialDelaySeconds: 10`
`periodSeconds: 10`
`successThreshold: 3`
`timeoutSeconds: 5` | +| `clusterHealthCheckParams` | The [Elasticsearch cluster health status params](https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params) that will be used by readinessProbe command | `wait_for_status=green&timeout=1s` | +| `imagePullSecrets` | Configuration for [imagePullSecrets](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret) so that you can use a private registry for your image | `[]` | +| `nodeSelector` | Configurable [nodeSelector](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector) so that you can target specific nodes for your Elasticsearch cluster | `{}` | +| `tolerations` | Configurable [tolerations](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/) | `[]` | +| `ingress` | Configurable [ingress](https://kubernetes.io/docs/concepts/services-networking/ingress/) to expose the Elasticsearch service. See [`values.yaml`](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example | `enabled: false` | +| `schedulerName` | Name of the [alternate scheduler](https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/#specify-schedulers-for-pods) | `nil` | +| `masterTerminationFix` | A workaround needed for Elasticsearch < 7.2 to prevent master status being lost during restarts [#63](https://github.com/elastic/helm-charts/issues/63) | `false` | +| `lifecycle` | Allows you to add lifecycle configuration. See [values.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/values.yaml) for an example of the formatting. | `{}` | +| `keystore` | Allows you map Kubernetes secrets into the keystore. See the [config example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/config/values.yaml) and [how to use the keystore](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-use-the-keystore) | `[]` | +| `rbac` | Configuration for creating a role, role binding and service account as part of this helm chart with `create: true`. Also can be used to reference an external service account with `serviceAccountName: "externalServiceAccountName"`. | `create: false`
`serviceAccountName: ""` | +| `podSecurityPolicy` | Configuration for create a pod security policy with minimal permissions to run this Helm chart with `create: true`. Also can be used to reference an external pod security policy with `name: "externalPodSecurityPolicy"` | `create: false`
`name: ""` | +| `nameOverride` | Overrides the clusterName when used in the naming of resources | `""` | +| `fullnameOverride` | Overrides the clusterName and nodeGroup when used in the naming of resources. This should only be used when using a single nodeGroup, otherwise you will have name conflicts | `""` | + +## Try it out + +In [examples/](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples) you will find some example configurations. These examples are used for the automated testing of this helm chart + +### Default + +To deploy a cluster with all default values and run the integration tests + +``` +cd examples/default +make +``` + +### Multi + +A cluster with dedicated node types + +``` +cd examples/multi +make +``` + +### Security + +A cluster with node to node security and https enabled. This example uses autogenerated certificates and password, for a production deployment you want to generate SSL certificates following the [official docs](https://www.elastic.co/guide/en/elasticsearch/reference/current/configuring-tls.html#node-certificates). + +* Generate the certificates and install Elasticsearch + ``` + cd examples/security + make + + # Run a curl command to interact with the cluster + kubectl exec -ti security-master-0 -- sh -c 'curl -u $ELASTIC_USERNAME:$ELASTIC_PASSWORD -k https://localhost:9200/_cluster/health?pretty' + ``` + +### FAQ + +#### How to install plugins? + +The [recommended](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#_c_customized_image) way to install plugins into our docker images is to create a custom docker image. + +The Dockerfile would look something like: + +``` +ARG elasticsearch_version +FROM docker.elastic.co/elasticsearch/elasticsearch:${elasticsearch_version} + +RUN bin/elasticsearch-plugin install --batch repository-gcs +``` + +And then updating the `image` in values to point to your custom image. + +There are a couple reasons we recommend this. + +1. Tying the availability of Elasticsearch to the download service to install plugins is not a great idea or something that we recommend. Especially in Kubernetes where it is normal and expected for a container to be moved to another host at random times. +2. Mutating the state of a running docker image (by installing plugins) goes against best practices of containers and immutable infrastructure. + +#### How to use the keystore? + + +##### Basic example + +Create the secret, the key name needs to be the keystore key path. In this example we will create a secret from a file and from a literal string. + +``` +kubectl create secret generic encryption_key --from-file=xpack.watcher.encryption_key=./watcher_encryption_key +kubectl create secret generic slack_hook --from-literal=xpack.notification.slack.account.monitoring.secure_url='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +To add these secrets to the keystore: +``` +keystore: + - secretName: encryption_key + - secretName: slack_hook +``` + +##### Multiple keys + +All keys in the secret will be added to the keystore. To create the previous example in one secret you could also do: + +``` +kubectl create secret generic keystore_secrets --from-file=xpack.watcher.encryption_key=./watcher_encryption_key --from-literal=xpack.notification.slack.account.monitoring.secure_url='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +``` +keystore: + - secretName: keystore_secrets +``` + +##### Custom paths and keys + +If you are using these secrets for other applications (besides the Elasticsearch keystore) then it is also possible to specify the keystore path and which keys you want to add. Everything specified under each `keystore` item will be passed through to the `volumeMounts` section for [mounting the secret](https://kubernetes.io/docs/concepts/configuration/secret/#using-secrets). In this example we will only add the `slack_hook` key from a secret that also has other keys. Our secret looks like this: + +``` +kubectl create secret generic slack_secrets --from-literal=slack_channel='#general' --from-literal=slack_hook='https://hooks.slack.com/services/asdasdasd/asdasdas/asdasd' +``` + +We only want to add the `slack_hook` key to the keystore at path `xpack.notification.slack.account.monitoring.secure_url`. + +``` +keystore: + - secretName: slack_secrets + items: + - key: slack_hook + path: xpack.notification.slack.account.monitoring.secure_url +``` + +You can also take a look at the [config example](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/config/) which is used as part of the automated testing pipeline. + +#### How to enable snapshotting? + +1. Install your [snapshot plugin](https://www.elastic.co/guide/en/elasticsearch/plugins/current/repository.html) into a custom docker image following the [how to install plugins guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-install-plugins) +2. Add any required secrets or credentials into an Elasticsearch keystore following the [how to use the keystore guide](https://github.com/elastic/helm-charts/tree/master/elasticsearch/README.md#how-to-use-the-keystore) +3. Configure the [snapshot repository](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-snapshots.html) as you normally would. +4. To automate snapshots you can use a tool like [curator](https://www.elastic.co/guide/en/elasticsearch/client/curator/current/snapshot.html). In the future there are plans to have Elasticsearch manage automated snapshots with [Snapshot Lifecycle Management](https://github.com/elastic/elasticsearch/issues/38461). + +### Local development environments + +This chart is designed to run on production scale Kubernetes clusters with multiple nodes, lots of memory and persistent storage. For that reason it can be a bit tricky to run them against local Kubernetes environments such as minikube. Below are some examples of how to get this working locally. + +#### Minikube + +This chart also works successfully on [minikube](https://kubernetes.io/docs/setup/minikube/) in addition to typical hosted Kubernetes environments. +An example `values.yaml` file for minikube is provided under `examples/`. + +In order to properly support the required persistent volume claims for the Elasticsearch `StatefulSet`, the `default-storageclass` and `storage-provisioner` minikube addons must be enabled. + +``` +minikube addons enable default-storageclass +minikube addons enable storage-provisioner +cd examples/minikube +make +``` + +Note that if `helm` or `kubectl` timeouts occur, you may consider creating a minikube VM with more CPU cores or memory allocated. + +#### Docker for Mac - Kubernetes + +It is also possible to run this chart with the built in Kubernetes cluster that comes with [docker-for-mac](https://docs.docker.com/docker-for-mac/kubernetes/). + +``` +cd examples/docker-for-mac +make +``` + +#### KIND - Kubernetes + +It is also possible to run this chart using a Kubernetes [KIND (Kubernetes in Docker)](https://github.com/kubernetes-sigs/kind) cluster: + +``` +cd examples/kubernetes-kind +make +``` + +#### MicroK8S + +It is also possible to run this chart using [MicroK8S](https://microk8s.io): + +``` +microk8s.enable dns +microk8s.enable helm +microk8s.enable storage +cd examples/microk8s +make +``` + +## Clustering and Node Discovery + +This chart facilitates Elasticsearch node discovery and services by creating two `Service` definitions in Kubernetes, one with the name `$clusterName-$nodeGroup` and another named `$clusterName-$nodeGroup-headless`. +Only `Ready` pods are a part of the `$clusterName-$nodeGroup` service, while all pods (`Ready` or not) are a part of `$clusterName-$nodeGroup-headless`. + +If your group of master nodes has the default `nodeGroup: master` then you can just add new groups of nodes with a different `nodeGroup` and they will automatically discover the correct master. If your master nodes have a different `nodeGroup` name then you will need to set `masterService` to `$clusterName-$masterNodeGroup`. + +The chart value for `masterService` is used to populate `discovery.zen.ping.unicast.hosts`, which Elasticsearch nodes will use to contact master nodes and form a cluster. +Therefore, to add a group of nodes to an existing cluster, setting `masterService` to the desired `Service` name of the related cluster is sufficient. + +For an example of deploying both a group master nodes and data nodes using multiple releases of this chart, see the accompanying values files in `examples/multi`. + +## Testing + +This chart uses [pytest](https://docs.pytest.org/en/latest/) to test the templating logic. The dependencies for testing can be installed from the [`requirements.txt`](https://github.com/elastic/helm-charts/tree/master/requirements.txt) in the parent directory. + +``` +pip install -r ../requirements.txt +make pytest +``` + +You can also use `helm template` to look at the YAML being generated + +``` +make template +``` + +It is possible to run all of the tests and linting inside of a docker container + +``` +make test +``` + +## Integration Testing + +Integration tests are run using [goss](https://github.com/aelsabbahy/goss/blob/master/docs/manual.md) which is a serverspec like tool written in golang. See [goss.yaml](https://github.com/elastic/helm-charts/tree/master/elasticsearch/examples/default/test/goss.yaml) for an example of what the tests look like. + +To run the goss tests against the default example: + +``` +cd examples/default +make goss +``` diff --git a/helm/atlas/charts/elasticsearch/templates/NOTES.txt b/helm/atlas/charts/elasticsearch/templates/NOTES.txt new file mode 100755 index 00000000000..73edf425af7 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/NOTES.txt @@ -0,0 +1,4 @@ +1. Watch all cluster members come up. + $ kubectl get pods --namespace={{ .Release.Namespace }} -l app={{ template "elasticsearch.uname" . }} -w +2. Test cluster health using Helm test. + $ helm test {{ .Release.Name }} diff --git a/helm/atlas/charts/elasticsearch/templates/_helpers.tpl b/helm/atlas/charts/elasticsearch/templates/_helpers.tpl new file mode 100755 index 00000000000..b8c971dcc64 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/_helpers.tpl @@ -0,0 +1,87 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "elasticsearch.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "elasticsearch.fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "elasticsearch.uname" -}} +{{- if empty .Values.fullnameOverride -}} +{{- if empty .Values.nameOverride -}} +{{ .Values.clusterName }}-{{ .Values.nodeGroup }} +{{- else -}} +{{ .Values.nameOverride }}-{{ .Values.nodeGroup }} +{{- end -}} +{{- else -}} +{{ .Values.fullnameOverride }} +{{- end -}} +{{- end -}} + +{{- define "elasticsearch.masterService" -}} +{{- if empty .Values.masterService -}} +{{- if empty .Values.fullnameOverride -}} +{{- if empty .Values.nameOverride -}} +{{ .Values.clusterName }}-master +{{- else -}} +{{ .Values.nameOverride }}-master +{{- end -}} +{{- else -}} +{{ .Values.fullnameOverride }} +{{- end -}} +{{- else -}} +{{ .Values.masterService }} +{{- end -}} +{{- end -}} + +{{- define "elasticsearch.endpoints" -}} +{{- $replicas := int (toString (.Values.replicas)) }} +{{- $uname := printf "%s-%s" .Values.clusterName .Values.nodeGroup }} + {{- range $i, $e := untilStep 0 $replicas 1 -}} +{{ $uname }}-{{ $i }}, + {{- end -}} +{{- end -}} + +{{- define "elasticsearch.esMajorVersion" -}} +{{- if .Values.esMajorVersion -}} +{{ .Values.esMajorVersion }} +{{- else -}} +{{- $version := int (index (.Values.imageTag | splitList ".") 0) -}} + {{- if and (contains "docker.elastic.co/elasticsearch/elasticsearch" .Values.image) (not (eq $version 0)) -}} +{{ $version }} + {{- else -}} +6 + {{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for statefulset. +*/}} +{{- define "elasticsearch.statefulset.apiVersion" -}} +{{- if semverCompare "<1.9-0" .Capabilities.KubeVersion.GitVersion -}} +{{- print "apps/v1beta2" -}} +{{- else -}} +{{- print "apps/v1" -}} +{{- end -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "elasticsearch.ingress.apiVersion" -}} +{{- if semverCompare "<1.14-0" .Capabilities.KubeVersion.GitVersion -}} +{{- print "extensions/v1beta1" -}} +{{- else -}} +{{- print "networking.k8s.io/v1beta1" -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/backup-cronjob.yaml b/helm/atlas/charts/elasticsearch/templates/backup-cronjob.yaml new file mode 100644 index 00000000000..f2f1725b412 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/backup-cronjob.yaml @@ -0,0 +1,41 @@ +{{- if .Values.backup.enabled -}} +apiVersion: batch/v1beta1 +kind: CronJob +metadata: + name: {{ template "elasticsearch.uname" . }}-backup + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + schedule: "{{ .Values.backup.schedule }}" + jobTemplate: + spec: + template: + spec: + {{- with .Values.nodeSelector }} + nodeSelector: + {{ toYaml . }} + {{- end }} + containers: + - name: es-backup + {{- if and .Values.multiarch.enabled .Values.multiarch.image.backup }} + image: {{ .Values.multiarch.image.backup }} + {{- else }} + image: {{ .Values.backup.image }} + {{- end }} + args: + - /bin/sh + - -c + - | + now="$(date +'%d%m%Y')" + curl -X PUT "http://atlas-elasticsearch-master.atlas.svc.cluster.local:9200/_snapshot/atlan_s3_repository/atlan_nightly_backup_$now?wait_for_completion=true&pretty" + restartPolicy: OnFailure +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/configmap.yaml b/helm/atlas/charts/elasticsearch/templates/configmap.yaml new file mode 100755 index 00000000000..e05ba3615fc --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/configmap.yaml @@ -0,0 +1,17 @@ +{{- if .Values.esConfig }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "elasticsearch.uname" . }}-config + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +data: +{{- range $path, $config := .Values.esConfig }} + {{ $path }}: | +{{ $config | indent 4 -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/es-regional-configmap.yaml b/helm/atlas/charts/elasticsearch/templates/es-regional-configmap.yaml new file mode 100644 index 00000000000..028403a4fd1 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/es-regional-configmap.yaml @@ -0,0 +1,37 @@ +{{- if .Values.s3_regional_endpoint }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "elasticsearch.uname" . }}-plugin-install-regional + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +data: + plugin-install.sh: | + #!/bin/bash + + echo "[+] Configuring plugins for elasticsearch - with regional endpoint" + + # Delete temporary keystore if present + rm -rf /usr/share/elasticsearch/config/elasticsearch.keystore.tmp + + # Configure S3 repository + # S3_BUCKET_NAME=atlan-devops-local, for testing purpose + # S3_BUCKET_PATH=atlan/infra/elasticsearch + curl -X PUT "localhost:9200/_snapshot/atlan_s3_repository?pretty" -H 'Content-Type: application/json' -d' + { + "type": "s3", + "settings": { + "bucket": "'"$S3_BUCKET_NAME"'", + "base_path": "'"$S3_BUCKET_PATH"'", + "role_arn": "'"$S3_BUCKET_ROLE_ARN"'", + "region": "'"$S3_BUCKET_REGION"'", + "endpoint" : "{{ .Values.s3_regional_endpoint }}", + "compress": "true" + } + } + ' +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/ingress.yaml b/helm/atlas/charts/elasticsearch/templates/ingress.yaml new file mode 100755 index 00000000000..d1a04a0aa66 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/ingress.yaml @@ -0,0 +1,55 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "elasticsearch.uname" . -}} +{{- $servicePort := .Values.httpPort -}} +{{- $ingressPath := .Values.ingress.path -}} +{{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: networking.k8s.io/v1 +{{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} +apiVersion: networking.k8s.io/v1beta1 +{{- else -}} +apiVersion: extensions/v1beta1 +{{- end }} +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ .Values.Namespace }} + labels: + app: {{ .Chart.Name }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: +{{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} +{{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + {{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }} + pathType: ImplementationSpecific + {{- end }} + backend: + {{- if $.Capabilities.APIVersions.Has "networking.k8s.io/v1" }} + service: + name: {{ $fullName }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: {{ $fullName }} + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml b/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml new file mode 100755 index 00000000000..a273a2fd667 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml @@ -0,0 +1,17 @@ +--- +{{- if .Values.minAvailable }} +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: policy/v1 +{{- else -}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: "{{ template "elasticsearch.uname" . }}-pdb" + namespace: {{ .Values.Namespace }} +spec: + minAvailable: {{ .Values.minAvailable }} + selector: + matchLabels: + app: "{{ template "elasticsearch.uname" . }}" +{{- end }} diff --git a/helm/atlas/charts/elasticsearch/templates/podsecuritypolicy.yaml b/helm/atlas/charts/elasticsearch/templates/podsecuritypolicy.yaml new file mode 100755 index 00000000000..d1a1bed4796 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/podsecuritypolicy.yaml @@ -0,0 +1,15 @@ +{{- if .Values.podSecurityPolicy.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ default $fullName .Values.podSecurityPolicy.name | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +spec: +{{ toYaml .Values.podSecurityPolicy.spec | indent 2 }} +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/role.yaml b/helm/atlas/charts/elasticsearch/templates/role.yaml new file mode 100755 index 00000000000..1f205dde2b1 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/role.yaml @@ -0,0 +1,26 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +rules: + - apiGroups: + - extensions + resources: + - podsecuritypolicies + resourceNames: + {{- if eq .Values.podSecurityPolicy.name "" }} + - {{ $fullName | quote }} + {{- else }} + - {{ .Values.podSecurityPolicy.name | quote }} + {{- end }} + verbs: + - use +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/rolebinding.yaml b/helm/atlas/charts/elasticsearch/templates/rolebinding.yaml new file mode 100755 index 00000000000..598403795b5 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/rolebinding.yaml @@ -0,0 +1,25 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +subjects: + - kind: ServiceAccount + {{- if eq .Values.rbac.serviceAccountName "" }} + name: {{ $fullName | quote }} + {{- else }} + name: {{ .Values.rbac.serviceAccountName | quote }} + {{- end }} + namespace: {{ .Release.Namespace | quote }} +roleRef: + kind: Role + name: {{ $fullName | quote }} + apiGroup: rbac.authorization.k8s.io +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/service.yaml b/helm/atlas/charts/elasticsearch/templates/service.yaml new file mode 100755 index 00000000000..cccf541aad0 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/service.yaml @@ -0,0 +1,74 @@ +--- +kind: Service +apiVersion: v1 +metadata: +{{- if eq .Values.nodeGroup "master" }} + name: {{ template "elasticsearch.masterService" . }} + namespace: {{ .Values.Namespace }} +{{- else }} + name: {{ template "elasticsearch.uname" . }} + namespace: {{ .Values.Namespace }} +{{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +{{- if .Values.service.labels }} +{{ toYaml .Values.service.labels | indent 4}} +{{- end }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +spec: + type: {{ .Values.service.type }} + selector: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + ports: + - name: {{ .Values.service.httpPortName | default "http" }} + protocol: TCP + port: {{ .Values.httpPort }} +{{- if .Values.service.nodePort }} + nodePort: {{ .Values.service.nodePort }} +{{- end }} + - name: {{ .Values.service.transportPortName | default "transport" }} + protocol: TCP + port: {{ .Values.transportPort }} +{{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml . | indent 4 }} +{{- end }} +--- +kind: Service +apiVersion: v1 +metadata: +{{- if eq .Values.nodeGroup "master" }} + name: {{ template "elasticsearch.masterService" . }}-headless + namespace: {{ .Values.Namespace }} +{{- else }} + name: {{ template "elasticsearch.uname" . }}-headless + namespace: {{ .Values.Namespace }} +{{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" +{{- if .Values.service.labelsHeadless }} +{{ toYaml .Values.service.labelsHeadless | indent 4 }} +{{- end }} + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" +spec: + clusterIP: None # This is needed for statefulset hostnames like elasticsearch-0 to resolve + # Create endpoints also if the related pod isn't ready + publishNotReadyAddresses: true + selector: + app: "{{ template "elasticsearch.uname" . }}" + ports: + - name: {{ .Values.service.httpPortName | default "http" }} + port: {{ .Values.httpPort }} + - name: {{ .Values.service.transportPortName | default "transport" }} + port: {{ .Values.transportPort }} diff --git a/helm/atlas/charts/elasticsearch/templates/serviceaccount.yaml b/helm/atlas/charts/elasticsearch/templates/serviceaccount.yaml new file mode 100755 index 00000000000..7fd3023a383 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/serviceaccount.yaml @@ -0,0 +1,18 @@ +{{- if .Values.rbac.create -}} +{{- $fullName := include "elasticsearch.uname" . -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + {{- if eq .Values.rbac.serviceAccountName "" }} + name: {{ $fullName | quote }} + namespace: {{ .Values.Namespace }} + {{- else }} + name: {{ .Values.rbac.serviceAccountName | quote }} + namespace: {{ .Values.Namespace }} + {{- end }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + app: {{ $fullName | quote }} +{{- end -}} diff --git a/helm/atlas/charts/elasticsearch/templates/statefulset.yaml b/helm/atlas/charts/elasticsearch/templates/statefulset.yaml new file mode 100755 index 00000000000..f02977199d3 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/statefulset.yaml @@ -0,0 +1,449 @@ +--- +{{- $isAWSCloud := eq .Values.global.cloud "aws" }} +apiVersion: {{ template "elasticsearch.statefulset.apiVersion" . }} +kind: StatefulSet +metadata: + name: {{ template "elasticsearch.uname" . }} + namespace: {{ .Values.Namespace }} + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + esMajorVersion: "{{ include "elasticsearch.esMajorVersion" . }}" +spec: + serviceName: {{ template "elasticsearch.uname" . }}-headless + selector: + matchLabels: + app: "{{ template "elasticsearch.uname" . }}" + replicas: {{ .Values.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: {{ template "elasticsearch.uname" . }} + {{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 8 }} + {{- end }} + spec: +{{ toYaml .Values.volumeClaimTemplate | indent 6 }} + {{- end }} + template: + metadata: + name: "{{ template "elasticsearch.uname" . }}" + labels: + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + chart: "{{ .Chart.Name }}" + app: "{{ template "elasticsearch.uname" . }}" + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + # backup.velero.io/backup-volumes: {{ template "elasticsearch.uname" . }} + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{/* This forces a restart if the configmap has changed */}} + {{- if .Values.esConfig }} + configchecksum: {{ include (print .Template.BasePath "/configmap.yaml") . | sha256sum | trunc 63 }} + {{- end }} + spec: + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + securityContext: +{{ toYaml .Values.podSecurityContext | indent 8 }} + {{- if .Values.fsGroup }} + fsGroup: {{ .Values.fsGroup }} # Deprecated value, please use .Values.podSecurityContext.fsGroup + {{- end }} + {{- if .Values.rbac.create }} + serviceAccountName: "{{ template "elasticsearch.uname" . }}" + {{- else if not (eq .Values.rbac.serviceAccountName "") }} + serviceAccountName: {{ .Values.rbac.serviceAccountName | quote }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 6 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + affinity: + {{- end }} + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- if eq .Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: "{{ template "elasticsearch.uname" .}}" + topologyKey: {{ .Values.antiAffinityTopologyKey }} + {{- else if eq .Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: {{ .Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "elasticsearch.uname" . }}" + {{- end }} + {{- end }} + nodeAffinity: + {{- if eq .Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + volumes: + {{- if .Values.s3_regional_endpoint }} + - name: plugin-install-regional + configMap: + name: {{ template "elasticsearch.uname" . }}-plugin-install-regional + defaultMode: 344 + items: + - key: plugin-install.sh + path: plugin-install.sh + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.esConfig }} + - name: esconfig + configMap: + name: {{ template "elasticsearch.uname" . }}-config + {{- end }} + - name: elasticsearch-synonym-config + configMap: + name: elasticsearch-synonym-config +{{- if .Values.keystore }} + - name: keystore + emptyDir: {} + {{- range .Values.keystore }} + - name: keystore-{{ .secretName }} + secret: {{ toYaml . | nindent 12 }} + {{- end }} +{{ end }} +{{- if .Values.extraVolumes }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end }} + initContainers: + {{- if .Values.sysctlInitContainer.enabled }} + - name: configure-sysctl + securityContext: + runAsUser: 0 + privileged: true + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: ["sysctl", "-w", "vm.max_map_count={{ .Values.sysctlVmMaxMapCount}}"] + resources: +{{ toYaml .Values.initResources | indent 10 }} + {{- end }} +{{ if .Values.keystore }} + - name: keystore + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: + - sh + - -c + - | + #!/usr/bin/env bash + set -euo pipefail + + for i in /tmp/keystoreSecrets/*/*; do + key=$(basename $i) + echo "Adding file $i to keystore key $key" + elasticsearch-keystore add-file "$key" "$i" + done + + # Add the bootstrap password since otherwise the Elasticsearch entrypoint tries to do this on startup + if [ ! -z ${ELASTIC_PASSWORD+x} ]; then + echo 'Adding env $ELASTIC_PASSWORD to keystore as key bootstrap.password' + echo "$ELASTIC_PASSWORD" | elasticsearch-keystore add -x bootstrap.password + fi + + cp -a /usr/share/elasticsearch/config/elasticsearch.keystore /tmp/keystore/ + env: {{ toYaml .Values.extraEnvs | nindent 10 }} + resources: {{ toYaml .Values.initResources | nindent 10 }} + volumeMounts: + - name: keystore + mountPath: /tmp/keystore + {{- range .Values.keystore }} + - name: keystore-{{ .secretName }} + mountPath: /tmp/keystoreSecrets/{{ .secretName }} + {{- end }} +{{ end }} + {{- if .Values.extraInitContainers }} +{{ tpl .Values.extraInitContainers . | indent 6 }} + {{- end }} + containers: + - name: "{{ template "elasticsearch.name" . }}" + securityContext: +{{ toYaml .Values.securityContext | indent 10 }} + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + # If the node is starting up wait for the cluster to be ready (request params: '{{ .Values.clusterHealthCheckParams }}' ) + # Once it has started only check that the node itself is responding + START_FILE=/tmp/.es_start_file + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ .Values.protocol }}://127.0.0.1:{{ .Values.httpPort }}${path} + } + + if [ -f "${START_FILE}" ]; then + echo 'Elasticsearch is already running, lets check the node is healthy and there are master nodes available' + http "/_cluster/health?timeout={{ .Values.healthCheckProbeTimeout }}" + else + echo 'Waiting for elasticsearch cluster to become ready (request params: "{{ .Values.clusterHealthCheckParams }}" )' + if http "/_cluster/health?{{ .Values.clusterHealthCheckParams }}" ; then + touch ${START_FILE} + exit 0 + else + echo 'Cluster is not yet ready (request params: "{{ .Values.clusterHealthCheckParams }}" )' + exit 1 + fi + fi + ports: + - name: http + containerPort: {{ .Values.httpPort }} + - name: transport + containerPort: {{ .Values.transportPort }} + resources: +{{- if eq .Values.global.Tier_Type "Enterprise" }} +{{ toYaml .Values.resources | indent 10 }} +{{- else if eq .Values.global.Tier_Type "Basic" }} +{{ toYaml .Values.resources_basic | indent 10 }} +{{- else if eq .Values.global.Tier_Type "Standard" }} +{{ toYaml .Values.resources_standard | indent 10 }} +{{- else }} +{{ toYaml .Values.resources | indent 10 }} +{{- end }} + {{- if .Values.multitenant }} + envFrom: {{ toYaml .Values.envFrom | nindent 10 }} + {{- end }} + env: + - name: node.name + valueFrom: + fieldRef: + fieldPath: metadata.name + {{- if eq .Values.roles.master "true" }} + {{- if ge (int (include "elasticsearch.esMajorVersion" .)) 7 }} + - name: cluster.initial_master_nodes + value: "{{ template "elasticsearch.endpoints" . }}" + {{- else }} + - name: discovery.zen.minimum_master_nodes + value: "{{ .Values.minimumMasterNodes }}" + {{- end }} + {{- end }} + {{- if lt (int (include "elasticsearch.esMajorVersion" .)) 7 }} + - name: discovery.zen.ping.unicast.hosts + value: "{{ template "elasticsearch.masterService" . }}-headless" + {{- else }} + - name: discovery.seed_hosts + value: "{{ template "elasticsearch.masterService" . }}-headless" + {{- end }} + - name: cluster.name + value: "{{ .Values.clusterName }}" + - name: cluster.max_shards_per_node + value: "{{ .Values.maxShardsPerNode }}" + - name: network.host + value: "{{ .Values.networkHost }}" + - name: ES_JAVA_OPTS + value: {{ .Values.esJavaOpts | quote }} + {{- range $role, $enabled := .Values.roles }} + - name: node.{{ $role }} + value: "{{ $enabled }}" + {{- end }} +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} +{{- end }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: "{{ template "elasticsearch.uname" . }}" + mountPath: /usr/share/elasticsearch/data + {{- end }} +{{ if .Values.keystore }} + - name: keystore + mountPath: /usr/share/elasticsearch/config/elasticsearch.keystore + subPath: elasticsearch.keystore +{{ end }} + - name: elasticsearch-synonym-config + mountPath: /usr/share/elasticsearch/config/synonym.txt + subPath: synonym.txt + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.esConfig }} + - name: esconfig + mountPath: /usr/share/elasticsearch/config/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- if .Values.s3_regional_endpoint }} + - name: plugin-install-regional + mountPath: /usr/share/elasticsearch/plugin-install.sh + subPath: plugin-install.sh + {{- end }} + {{- if .Values.extraVolumeMounts }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} + {{- end }} + {{- end }} + {{- if .Values.masterTerminationFix }} + {{- if eq .Values.roles.master "true" }} + # This sidecar will prevent slow master re-election + # https://github.com/elastic/helm-charts/issues/63 + - name: elasticsearch-master-graceful-termination-handler + {{- if and $isAWSCloud .Values.multiarch.enabled .Values.multiarch.image.elasticsearch }} + image: {{ .Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + {{- end }} + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + command: + - "sh" + - -c + - | + #!/usr/bin/env bash + set -eo pipefail + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ .Values.protocol }}://{{ template "elasticsearch.masterService" . }}:{{ .Values.httpPort }}${path} + } + + cleanup () { + while true ; do + local master="$(http "/_cat/master?h=node" || echo "")" + if [[ $master == "{{ template "elasticsearch.masterService" . }}"* && $master != "${NODE_NAME}" ]]; then + echo "This node is not master." + break + fi + echo "This node is still master, waiting gracefully for it to step down" + sleep 1 + done + + exit 0 + } + + trap cleanup SIGTERM + + sleep infinity & + wait $! + resources: +{{ toYaml .Values.sidecarResources | indent 10 }} + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + {{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} + {{- end }} + {{- end }} + {{- end }} +{{- if .Values.lifecycle }} + lifecycle: +{{ toYaml .Values.lifecycle | indent 10 }} +{{- end }} + {{- if .Values.extraContainers }} + # Currently some extra blocks accept strings + # to continue with backwards compatibility this is being kept + # whilst also allowing for yaml to be specified too. + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} diff --git a/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml b/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml new file mode 100644 index 00000000000..26a184d6c0f --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml @@ -0,0 +1,28 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: elasticsearch-synonym-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + synonym.txt: | + customer, consumer + hr, hour + country, nation + delete, archive + updated, modified, altered + lat, latitude + long, longitude + profit, loss, revenue + customer, consumer, buyer + id, userid, user_id, guid + qty, quantity + cus_id, customer_id, customerid + amt, amount + review, rating + cost, price + home for data teams, atlan diff --git a/helm/atlas/charts/elasticsearch/values.yaml b/helm/atlas/charts/elasticsearch/values.yaml new file mode 100755 index 00000000000..6f3709b749d --- /dev/null +++ b/helm/atlas/charts/elasticsearch/values.yaml @@ -0,0 +1,319 @@ +--- + +multiarch: + enabled: false + image: {} + +clusterName: "atlas-elasticsearch" +nodeGroup: "master" + +# The service that non master groups will try to connect to when joining the cluster +# This should be set to clusterName + "-" + nodeGroup for your master group +global: + Tier_Type: "" +masterService: "" + +# Elasticsearch roles that will be applied to this nodeGroup +# These will be set as environment variables. E.g. node.master=true +roles: + master: "true" + ingest: "true" + data: "true" + +replicas: 3 +esMajorVersion: 7 +minimumMasterNodes: 1 +# JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage +# This is simpler and more reliable than manual calculation +esJavaOpts: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + +# Allows you to add any config files in /usr/share/elasticsearch/config/ +# such as elasticsearch.yml and log4j2.properties +esConfig: + elasticsearch.yml: | + http.max_content_length: 2147483647b + ingest.geoip.downloader.enabled: false + xpack.security.enabled: false + + log4j2.properties: | + -Dlog4j2.formatMsgNoLookups=true + +# Extra environment variables to append to this nodeGroup +# This will be appended to the current 'env:' key. You can use any of the kubernetes env +# syntax here +# Env variable for elasticsearch +envFrom: [] +extraEnvs: + # - name: S3_BUCKET_NAME + # value: '' + - name: S3_BUCKET_PATH + value: 'backup/elasticsearch' + # - name: S3_BUCKET_ROLE_ARN + # value: '' + # - name: S3_BUCKET_REGION + # value: '' +# - name: MY_ENVIRONMENT_VAR +# value: the_value_goes_here + +# A list of secrets and their paths to mount inside the pod +# This is useful for mounting certificates for security and for mounting +# the X-Pack license +secretMounts: [] +# - name: elastic-certificates +# secretName: elastic-certificates +# path: /usr/share/elasticsearch/config/certs + +image: "ghcr.io/atlanhq/elasticsearch-atlan-v2" +imageTag: "7.17.4" +imagePullPolicy: "IfNotPresent" + +resources: + requests: + cpu: "500m" + memory: "6Gi" + limits: + cpu: "2000m" + memory: "7Gi" + +resources_basic: + requests: + memory: "50Mi" + limits: + memory: "4Gi" + +resources_standard: + requests: + memory: "50Mi" + limits: + memory: "5Gi" + +podAnnotations: {} + # iam.amazonaws.com/role: es-cluster + +# additionals labels +labels: {} + + +initResources: {} + +sidecarResources: {} + +networkHost: "0.0.0.0" + +maxShardsPerNode: "4000" + +volumeClaimTemplate: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + +rbac: + create: false + serviceAccountName: "" + +podSecurityPolicy: + create: false + name: "" + spec: + privileged: true + fsGroup: + rule: RunAsAny + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - secret + - configMap + - persistentVolumeClaim + +persistence: + enabled: true + annotations: {} + +extraVolumes: + - name: varlog + emptyDir: {} + +extraVolumeMounts: + - name: varlog + mountPath: /usr/share/elasticsearch/logs + +extraContainers: + - name: atlas-es-gclog-sidecar + image: ghcr.io/atlanhq/busybox:1.32-multiarch + args: [/bin/sh, -c, 'sleep 60 && tail -n+1 -F /usr/share/elasticsearch/logs/gc.log'] + volumeMounts: + - name: varlog + mountPath: /usr/share/elasticsearch/logs/ + +# This is the PriorityClass settings as defined in +# https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +priorityClassName: "" + +# By default this will make sure two pods don't end up on the same node +# Changing this to a region would allow you to spread pods across regions +antiAffinityTopologyKey: "kubernetes.io/hostname" + +# Hard means that by default pods will only be scheduled if there are enough nodes for them +# and that they will never end up on the same node. Setting this to soft will do this "best effort" +antiAffinity: "hard" + +# This is the node affinity settings as defined in +# https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity-beta-feature +nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: nodegroup + # operator: NotIn + # values: + # - atlan-spark + # - key: lifecycle + # operator: In + # values: + # - None + +# The default is to deploy all pods serially. By setting this to parallel all pods are started at +# the same time when bootstrapping the cluster +podManagementPolicy: "Parallel" + +protocol: http +httpPort: 9200 +transportPort: 9300 + +service: + labels: {} + labelsHeadless: {} + type: ClusterIP + nodePort: "" + annotations: {} + httpPortName: http + transportPortName: transport + loadBalancerSourceRanges: [] + +updateStrategy: RollingUpdate + +# This is the max unavailable setting for the pod disruption budget +# The default value of 1 will make sure that kubernetes won't allow more than 1 +# of your pods to be unavailable during maintenance +minAvailable: 2 + +podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + +# The following value is deprecated, +# please use the above podSecurityContext.fsGroup instead +fsGroup: "" + +securityContext: + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + +# How long to wait for elasticsearch to stop gracefully +terminationGracePeriod: 120 + +sysctlVmMaxMapCount: 262144 + +readinessProbe: + failureThreshold: 6 + initialDelaySeconds: 30 + periodSeconds: 10 + successThreshold: 3 + timeoutSeconds: 15 + +custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + +# https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params wait_for_status +clusterHealthCheckParams: "wait_for_status=yellow&timeout=5s" + +# Timeout for readiness probe health check when ES is already running +healthCheckProbeTimeout: "1s" + +## Use an alternate scheduler. +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +schedulerName: "" + +imagePullSecrets: [] +nodeSelector: {} +tolerations: [] + +# Enabling this will publically expose your Elasticsearch instance. +# Only enable this if you have security enabled on your cluster +ingress: + enabled: false + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + path: / + hosts: + - chart-example.local + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +nameOverride: "" +fullnameOverride: "" + +# https://github.com/elastic/helm-charts/issues/63 +masterTerminationFix: false + +lifecycle: {} + # preStop: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + # postStart: + # exec: + # command: + # - bash + # - -c + # - | + # #!/bin/bash + # # Add a template to adjust number of shards/replicas + # TEMPLATE_NAME=my_template + # INDEX_PATTERN="logstash-*" + # SHARD_COUNT=8 + # REPLICA_COUNT=1 + # ES_URL=http://localhost:9200 + # while [[ "$(curl -s -o /dev/null -w '%{http_code}\n' $ES_URL)" != "200" ]]; do sleep 1; done + # curl -XPUT "$ES_URL/_template/$TEMPLATE_NAME" -H 'Content-Type: application/json' -d'{"index_patterns":['\""$INDEX_PATTERN"\"'],"settings":{"number_of_shards":'$SHARD_COUNT',"number_of_replicas":'$REPLICA_COUNT'}}' + +sysctlInitContainer: + enabled: true + +keystore: [] + +backup: + enabled: false + image: ghcr.io/atlanhq/alpine-curl-atlan-v2:3.21.0 + #imageTag: latest + schedule: '0 3 * * *' diff --git a/helm/atlas/charts/logstash/.helmignore b/helm/atlas/charts/logstash/.helmignore new file mode 100644 index 00000000000..e12c0b4b918 --- /dev/null +++ b/helm/atlas/charts/logstash/.helmignore @@ -0,0 +1,2 @@ +tests/ +.pytest_cache/ diff --git a/helm/atlas/charts/logstash/Chart.yaml b/helm/atlas/charts/logstash/Chart.yaml new file mode 100644 index 00000000000..17c4756aab1 --- /dev/null +++ b/helm/atlas/charts/logstash/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +description: Official Elastic helm chart for Logstash +home: https://github.com/elastic/helm-charts +maintainers: + - email: helm-charts@elastic.co + name: Elastic +name: logstash +version: 9.1.2 +appVersion: 9.1.2 +sources: + - https://github.com/elastic/logstash +icon: https://helm.elastic.co/icons/logstash.png diff --git a/helm/atlas/charts/logstash/Makefile b/helm/atlas/charts/logstash/Makefile new file mode 100644 index 00000000000..22218a1f62a --- /dev/null +++ b/helm/atlas/charts/logstash/Makefile @@ -0,0 +1 @@ +include ../helpers/common.mk diff --git a/helm/atlas/charts/logstash/README.md b/helm/atlas/charts/logstash/README.md new file mode 100644 index 00000000000..bf6c3070913 --- /dev/null +++ b/helm/atlas/charts/logstash/README.md @@ -0,0 +1,244 @@ +# Logstash Helm Chart + +[![Build Status](https://img.shields.io/jenkins/s/https/devops-ci.elastic.co/job/elastic+helm-charts+main.svg)](https://devops-ci.elastic.co/job/elastic+helm-charts+main/) [![Artifact HUB](https://img.shields.io/endpoint?url=https://artifacthub.io/badge/repository/elastic)](https://artifacthub.io/packages/search?repo=elastic) + +This Helm chart is a lightweight way to configure and run our official +[Logstash Docker image][]. + +> **Warning** +> When it comes to running the Elastic on Kubernetes infrastructure, we +> recommend [Elastic Cloud on Kubernetes][] (ECK) as the best way to run and manage +> the Elastic Stack. +> +> ECK offers many operational benefits for both our basic-tier and our +> enterprise-tier customers, such as spinning up cluster nodes that were lost on +> failed infrastructure, seamless upgrades, rolling cluster changes, and much +> much more. +> +> With the release of the Elastic Stack Helm charts for Elastic version 8.5.1, +> we are handing over the ongoing maintenance of our Elastic Stack Helm charts +> to the community and contributors. This repository will finally be archived +> after 6 months time. Elastic Stacks deployed on Kubernetes through Helm charts +> will still be fully supported under EOL limitations. +> +> Since we want to provide an even better experience for our customers by +> running the Elastic Stack on Kubernetes, we will continue maintaining the +> Helm charts applicable to ECK Custom Resources. These charts can be found in +> the [ECK repository][eck-charts]. +> +> Helm charts will currently be maintained for ECK Enterprise-tier customers, +> however, we encourage the community to engage with the existing Helm charts +> for the Elastic Stack and continue supporting their ongoing maintenance. +> +> See for more details. + + + + + +- [Requirements](#requirements) +- [Installing](#installing) + - [Install a released version using the Helm repository](#install-a-released-version-using-the-helm-repository) + - [Install a development version using the main branch](#install-a-development-version-using-the-main-branch) +- [Upgrading](#upgrading) +- [Usage notes](#usage-notes) +- [Configuration](#configuration) +- [FAQ](#faq) + - [How to install OSS version of Logstash?](#how-to-install-oss-version-of-logstash) + - [How to install plugins?](#how-to-install-plugins) +- [Contributing](#contributing) + + + + + + +## Requirements + +See [supported configurations][] for more details. + +## Installing + +### Install a released version using the Helm repository + +* Add the Elastic Helm charts repo: +`helm repo add elastic https://helm.elastic.co` + +* Install it: `helm install logstash elastic/logstash` + +### Install a development version using the main branch + +* Clone the git repo: `git clone git@github.com:elastic/helm-charts.git` + +* Install it: `helm install logstash ./helm-charts/logstash --set imageTag=8.5.1` + + +## Upgrading + +Please always check [CHANGELOG.md][] and [BREAKING_CHANGES.md][] before +upgrading to a new chart version. + + +## Usage notes + +* This repo includes several [examples][] of configurations that can be used +as a reference. They are also used in the automated testing of this chart +* Automated testing of this chart is currently only run against GKE (Google +Kubernetes Engine). +* The chart deploys a StatefulSet and by default will do an automated rolling +update of your cluster. It does this by waiting for the cluster health to become +green after each instance is updated. If you prefer to update manually you can +set `OnDelete` [updateStrategy][]. +* It is important to verify that the JVM heap size in `logstashJavaOpts` and to +set the CPU/Memory `resources` to something suitable for your cluster. +* We have designed this chart to be very un-opinionated about how to configure +Logstash. It exposes ways to set environment variables and mount secrets inside +of the container. Doing this makes it much easier for this chart to support +multiple versions with minimal changes. +* `logstash.yml` configuration files can be set either by a ConfigMap using +`logstashConfig` in `values.yml` or by environment variables using `extraEnvs` +in `values.yml` , however Logstash Docker image can't mix both methods as +defining settings with environment variables causes `logstash.yml` to be +modified in place while using ConfigMap bind-mount the same file (more details +in this [note][]). +* When overriding `logstash.yml`, `http.host: 0.0.0.0` should always be included +to make default probes work. If restricting HTTP API to 127.0.0.1 is required by +using `http.host: 127.0.0.1`, default probes should be disabled or overridden +(see [values.yaml][] for the good syntax). +* An ingress is provided that can be used to expose the HTTP port. This can be +useful for the [http input plugin][], for instance. + + +## Configuration + +| Parameter | Description | Default | +|---------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------| +| `antiAffinityTopologyKey` | The [anti-affinity][] topology key]. By default this will prevent multiple Logstash nodes from running on the same Kubernetes node | `kubernetes.io/hostname` | +| `antiAffinity` | Setting this to hard enforces the [anti-affinity][] rules. If it is set to soft it will be done "best effort". Other values will be ignored | `hard` | +| `envFrom` | Templatable string to be passed to the [environment from variables][] which will be appended to the `envFrom:` definition for the container | `[]` | +| `extraContainers` | Templatable string of additional containers to be passed to the `tpl` function | `[]` | +| `extraEnvs` | Extra [environment variables][] which will be appended to the `env:` definition for the container | `[]` | +| `extraInitContainers` | Templatable string of additional `initContainers` to be passed to the `tpl` function | `[]` | +| `extraPorts` | An array of extra ports to open on the pod | `[]` | +| `extraVolumeMounts` | Templatable string of additional `volumeMounts` to be passed to the `tpl` function | `[]` | +| `extraVolumes` | Templatable string of additional `volumes` to be passed to the `tpl` function | `[]` | +| `fullnameOverride` | Overrides the full name of the resources. If not set the name will default to " `.Release.Name` - `.Values.nameOverride or .Chart.Name` " | `""` | +| `hostAliases` | Configurable [hostAliases][] | `[]` | +| `httpPort` | The http port that Kubernetes will use for the healthchecks and the service | `9600` | +| `imagePullPolicy` | The Kubernetes [imagePullPolicy][] value | `IfNotPresent` | +| `imagePullSecrets` | Configuration for [imagePullSecrets][] so that you can use a private registry for your image | `[]` | +| `imageTag` | The Logstash Docker image tag | `8.5.1` | +| `image` | The Logstash Docker image | `docker.elastic.co/logstash/logstash` | +| `labels` | Configurable [labels][] applied to all Logstash pods | `{}` | +| `ingress` | Configurable [ingress][] for external access to Logstash HTTP port. | see [values.yaml][] | +| `lifecycle` | Allows you to add lifecycle configuration. See [values.yaml][] for an example of the formatting | `{}` | +| `livenessProbe` | Configuration fields for the liveness [probe][] | see [values.yaml][] | +| `logstashConfig` | Allows you to add any config files in `/usr/share/logstash/config/` such as `logstash.yml` and `log4j2.properties` See [values.yaml][] for an example of the formatting | `{}` | +| `logstashJavaOpts` | Java options for Logstash. This is where you should configure the JVM heap size | `-Xmx1g -Xms1g` | +| `logstashPipeline` | Allows you to add any pipeline files in `/usr/share/logstash/pipeline/` | `{}` | +| `logstashPatternDir` | Allows you to define a custom directory to store pattern files | `/usr/share/logstash/patterns/` | +| `logstashPattern` | Allows you to add any pattern files in `logstashPatternDir` | `{}` | +| `maxUnavailable` | The [maxUnavailable][] value for the pod disruption budget. By default this will prevent Kubernetes from having more than 1 unhealthy pod in the node group | `1` | +| `nameOverride` | Overrides the chart name for resources. If not set the name will default to `.Chart.Name` | `""` | +| `nodeAffinity` | Value for the [node affinity settings][] | `{}` | +| `podAffinity` | Value for the [pod affinity settings][] | `{}` | +| `nodeSelector` | Configurable [nodeSelector][] so that you can target specific nodes for your Logstash cluster | `{}` | +| `persistence` | Enables a persistent volume for Logstash data | see [values.yaml][] | +| `podAnnotations` | Configurable [annotations][] applied to all Logstash pods | `{}` | +| `podManagementPolicy` | By default Kubernetes [deploys StatefulSets serially][]. This deploys them in parallel so that they can discover each other | `Parallel` | +| `podSecurityContext` | Allows you to set the [securityContext][] for the pod | see [values.yaml][] | +| `podSecurityPolicy` | Configuration for create a pod security policy with minimal permissions to run this Helm chart with `create: true` Also can be used to reference an external pod security policy with `name: "externalPodSecurityPolicy"` | see [values.yaml][] | +| `priorityClassName` | The name of the [PriorityClass][]. No default is supplied as the PriorityClass must be created first | `""` | +| `rbac` | Configuration for creating a role, role binding and service account as part of this Helm chart with `create: true` Also can be used to reference an external service account with `serviceAccountName: "externalServiceAccountName"` | see [values.yaml][] | +| `readinessProbe` | Configuration fields for the readiness [probe][] | see [values.yaml][] | +| `replicas` | Kubernetes replica count for the StatefulSet (i.e. how many pods) | `1` | +| `resources` | Allows you to set the [resources][] for the StatefulSet | see [values.yaml][] | +| `schedulerName` | Name of the [alternate scheduler][] | `""` | +| `secrets` | Allows you easily create a secret from as variables or file. For add secrets from file, add suffix `.filepath` to the key of secret key. The value will be encoded to base64. Useful for store certificates and other secrets. | See [values.yaml][] | +| `secretMounts` | Allows you easily mount a secret as a file inside the StatefulSet. Useful for mounting certificates and other secrets. See [values.yaml][] for an example | `[]` | +| `securityContext` | Allows you to set the [securityContext][] for the container | see [values.yaml][] | +| `service` | Configurable [service][] to expose the Logstash service. | see [values.yaml][] | +| `terminationGracePeriod` | The [terminationGracePeriod][] in seconds used when trying to stop the pod | `120` | +| `tolerations` | Configurable [tolerations][] | `[]` | +| `updateStrategy` | The [updateStrategy][] for the StatefulSet. By default Kubernetes will wait for the cluster to be green after upgrading each pod. Setting this to `OnDelete` will allow you to manually delete each pod during upgrades | `RollingUpdate` | +| `volumeClaimTemplate` | Configuration for the [volumeClaimTemplate for StatefulSets][]. You will want to adjust the storage (default `30Gi` ) and the `storageClassName` if you are using a different storage class | see [values.yaml][] | + + +## FAQ + +### How to install OSS version of Logstash? + +Deploying OSS version of Logstash can be done by setting `image` value to +[Logstash OSS Docker image][] + +An example of Logstash deployment using OSS version can be found in +[examples/oss][]. + +### How to install plugins? + +The recommended way to install plugins into our Docker images is to create a +[custom Docker image][]. + +The Dockerfile would look something like: + +``` +ARG logstash_version +FROM docker.elastic.co/logstash/logstash:${logstash_version} +RUN bin/logstash-plugin install logstash-output-kafka +``` + +And then updating the `image` in values to point to your custom image. + +There are a couple reasons we recommend this: + +1. Tying the availability of Logstash to the download service to install plugins +is not a great idea or something that we recommend. Especially in Kubernetes +where it is normal and expected for a container to be moved to another host at +random times. +2. Mutating the state of a running Docker image (by installing plugins) goes +against best practices of containers and immutable infrastructure. + + +## Contributing + +Please check [CONTRIBUTING.md][] before any contribution or for any questions +about our development and testing process. + +[alternate scheduler]: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/#specify-schedulers-for-pods +[annotations]: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +[anti-affinity]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity +[BREAKING_CHANGES.md]: https://github.com/elastic/helm-charts/blob/main/BREAKING_CHANGES.md +[CHANGELOG.md]: https://github.com/elastic/helm-charts/blob/main/CHANGELOG.md +[CONTRIBUTING.md]: https://github.com/elastic/helm-charts/blob/main/CONTRIBUTING.md +[custom docker image]: https://www.elastic.co/guide/en/logstash/current/docker-config.html#_custom_images +[deploys statefulsets serially]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#pod-management-policies +[eck-charts]: https://github.com/elastic/cloud-on-k8s/tree/master/deploy +[elastic cloud on kubernetes]: https://github.com/elastic/cloud-on-k8s +[environment from variables]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-pod-configmap/#configure-all-key-value-pairs-in-a-configmap-as-container-environment-variables +[environment variables]: https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config +[examples]: https://github.com/elastic/helm-charts/tree/main/logstash/examples +[examples/oss]: https://github.com/elastic/helm-charts/tree/main/logstash/examples/oss +[hostAliases]: https://kubernetes.io/docs/concepts/services-networking/add-entries-to-pod-etc-hosts-with-host-aliases/ +[http input plugin]: https://www.elastic.co/guide/en/logstash/current/plugins-inputs-http.html +[imagePullPolicy]: https://kubernetes.io/docs/concepts/containers/images/#updating-images +[imagePullSecrets]: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-pod-that-uses-your-secret +[ingress]: https://kubernetes.io/docs/concepts/services-networking/ingress/ +[labels]: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +[logstash docker image]: https://www.elastic.co/guide/en/logstash/current/docker.html +[logstash oss docker image]: https://www.docker.elastic.co/r/logstash/logstash-oss +[maxUnavailable]: https://kubernetes.io/docs/tasks/run-application/configure-pdb/#specifying-a-poddisruptionbudget +[node affinity settings]: https://kubernetes.io/docs/tasks/configure-pod-container/assign-pods-nodes-using-node-affinity/ +[nodeSelector]: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#nodeselector +[note]: https://www.elastic.co/guide/en/logstash/current/docker-config.html#docker-env-config +[pod affinity settings]: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity +[priorityClass]: https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +[probe]: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-probes/ +[resources]: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ +[securityContext]: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod +[service]: https://kubernetes.io/docs/concepts/services-networking/service/ +[supported configurations]: https://github.com/elastic/helm-charts/tree/main/README.md#supported-configurations +[terminationGracePeriod]: https://kubernetes.io/docs/concepts/workloads/pods/pod/#termination-of-pods +[tolerations]: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ +[updateStrategy]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/ +[values.yaml]: https://github.com/elastic/helm-charts/tree/main/logstash/values.yaml +[volumeClaimTemplate for statefulsets]: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#stable-storage diff --git a/helm/atlas/charts/logstash/templates/NOTES.txt b/helm/atlas/charts/logstash/templates/NOTES.txt new file mode 100755 index 00000000000..215c0e927e8 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/NOTES.txt @@ -0,0 +1,2 @@ +1. Watch all cluster members come up. + $ kubectl get pods --namespace={{ .Release.Namespace }} -l app={{ template "logstash.fullname" . }} -w diff --git a/helm/atlas/charts/logstash/templates/_helpers.tpl b/helm/atlas/charts/logstash/templates/_helpers.tpl new file mode 100755 index 00000000000..bea7112aefa --- /dev/null +++ b/helm/atlas/charts/logstash/templates/_helpers.tpl @@ -0,0 +1,45 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "logstash.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "logstash.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} + +{{/* +Use the fullname if the serviceAccount value is not set +*/}} +{{- define "logstash.serviceAccount" -}} +{{- .Values.rbac.serviceAccountName | default (include "logstash.fullname" .) -}} +{{- end -}} + +{{/* +Create the namespace name +*/}} +{{- define "logstash.namespace" -}} +{{- if .Values.namespaceOverride -}} +{{- .Values.namespaceOverride -}} +{{- else -}} +{{- .Release.Namespace -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label +*/}} +{{- define "logstash.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} diff --git a/helm/atlas/charts/logstash/templates/configmap-config.yaml b/helm/atlas/charts/logstash/templates/configmap-config.yaml new file mode 100644 index 00000000000..b4c930300a1 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/configmap-config.yaml @@ -0,0 +1,20 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.logstashConfig }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-config + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashConfig }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/logstash/templates/configmap-metricbeat.yaml b/helm/atlas/charts/logstash/templates/configmap-metricbeat.yaml new file mode 100644 index 00000000000..fe16b54250b --- /dev/null +++ b/helm/atlas/charts/logstash/templates/configmap-metricbeat.yaml @@ -0,0 +1,69 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.extraContainers }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-logstash-metricbeat-config + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ template "logstash.chart" . }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: + metricbeat.yml: | + metricbeat.config.modules: + path: ${path.config}/modules.d/*.yml + reload.enabled: true + reload.period: 10s + + metricbeat.modules: + # Monitor Logstash process only (primary focus) + - module: logstash + metricsets: + - node + - node_stats + period: 10s + hosts: ["localhost:9600"] + xpack.enabled: true + + # System metrics disabled due to missing /hostfs mounts + # - module: system + # metricsets: + # - cpu + # - memory + # - process_summary + # period: 10s + + # Monitor JVM metrics from Logstash (commented out - using logstash module instead) + # - module: jolokia + # metricsets: ["jmx"] + # period: 10s + # hosts: ["localhost:9600"] + # namespace: "logstash_jvm" + + output.elasticsearch: + hosts: ["${ELASTIC_HOSTS}"] + index: "metricbeat-logstash-%{+yyyy.MM.dd}" + template.enabled: false + + # Disable template management and data streams completely + setup.template.enabled: false + setup.ilm.enabled: false + + processors: + # Disabled all kubernetes/docker processors to avoid permission issues + # - add_docker_metadata: ~ + # - add_kubernetes_metadata: + # in_cluster: true + - add_fields: + target: kubernetes + fields: + pod_name: ${POD_NAME} + namespace: {{ .Release.Namespace }} + component: logstash + + logging.level: info + logging.to_stderr: true +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/configmap-pattern.yaml b/helm/atlas/charts/logstash/templates/configmap-pattern.yaml new file mode 100644 index 00000000000..25d2cfdf2bc --- /dev/null +++ b/helm/atlas/charts/logstash/templates/configmap-pattern.yaml @@ -0,0 +1,20 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.logstashPattern }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-pattern + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashPattern }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/logstash/templates/configmap-pipeline.yaml b/helm/atlas/charts/logstash/templates/configmap-pipeline.yaml new file mode 100644 index 00000000000..30d2d931917 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/configmap-pipeline.yaml @@ -0,0 +1,20 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.logstashPipeline }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "logstash.fullname" . }}-pipeline + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +data: +{{- range $path, $config := .Values.logstashPipeline }} + {{ $path }}: | +{{ tpl $config $ | indent 4 -}} +{{- end -}} +{{- end -}} +{{- end -}} diff --git a/helm/atlas/charts/logstash/templates/ingress.yaml b/helm/atlas/charts/logstash/templates/ingress.yaml new file mode 100644 index 00000000000..2fbe4858b19 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/ingress.yaml @@ -0,0 +1,71 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "logstash.fullname" . -}} +{{- $httpPort := .Values.httpPort -}} +{{- $ingressPath := .Values.ingress.path -}} +{{- $pathtype := .Values.ingress.pathtype -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ include "logstash.namespace" . }} + labels: + app: {{ $fullName | quote}} + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +{{- with .Values.ingress.annotations }} + annotations: +{{ toYaml . | indent 4 }} +{{- end }} +spec: + {{- if .Values.ingress.className }} + ingressClassName: {{ .Values.ingress.className | quote }} + {{- end }} +{{- if .Values.ingress.tls }} + tls: + {{- if .ingressPath }} + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- else }} +{{ toYaml .Values.ingress.tls | indent 4 }} + {{- end }} +{{- end}} + rules: + {{- range .Values.ingress.hosts }} + {{- /* + TODO: deprecate $ingressPath for Logstash 8.x + */}} + {{- if $ingressPath }} + - host: {{ . }} + http: + paths: + - path: {{ $ingressPath }} + pathType: {{ $pathtype }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ $httpPort }} + {{- else }} + - host: {{ .host }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ $pathtype }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ .servicePort | default $httpPort }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/poddisruptionbudget.yaml b/helm/atlas/charts/logstash/templates/poddisruptionbudget.yaml new file mode 100644 index 00000000000..845af394223 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/poddisruptionbudget.yaml @@ -0,0 +1,23 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.maxUnavailable }} +{{- if .Capabilities.APIVersions.Has "policy/v1" -}} +apiVersion: policy/v1 +{{- else}} +apiVersion: policy/v1beta1 +{{- end }} +kind: PodDisruptionBudget +metadata: + name: "{{ template "logstash.fullname" . }}-pdb" + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +spec: + maxUnavailable: {{ .Values.maxUnavailable }} + selector: + matchLabels: + app: "{{ template "logstash.fullname" . }}" +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/podsecuritypolicy.yaml b/helm/atlas/charts/logstash/templates/podsecuritypolicy.yaml new file mode 100644 index 00000000000..2ea340a95f6 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/podsecuritypolicy.yaml @@ -0,0 +1,16 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.podSecurityPolicy.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: policy/v1beta1 +kind: PodSecurityPolicy +metadata: + name: {{ default $fullName .Values.podSecurityPolicy.name | quote }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +spec: +{{ toYaml .Values.podSecurityPolicy.spec | indent 2 }} +{{- end -}} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/role.yaml b/helm/atlas/charts/logstash/templates/role.yaml new file mode 100644 index 00000000000..4d45dea6d6d --- /dev/null +++ b/helm/atlas/charts/logstash/templates/role.yaml @@ -0,0 +1,28 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: {{ $fullName | quote }} + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +rules: + - apiGroups: + - extensions + resources: + - podsecuritypolicies + resourceNames: + {{- if eq .Values.podSecurityPolicy.name "" }} + - {{ $fullName | quote }} + {{- else }} + - {{ .Values.podSecurityPolicy.name | quote }} + {{- end }} + verbs: + - use +{{- end -}} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/rolebinding.yaml b/helm/atlas/charts/logstash/templates/rolebinding.yaml new file mode 100644 index 00000000000..a5e27b1c1ff --- /dev/null +++ b/helm/atlas/charts/logstash/templates/rolebinding.yaml @@ -0,0 +1,23 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: {{ $fullName | quote }} + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +subjects: + - kind: ServiceAccount + name: "{{ template "logstash.serviceAccount" . }}" + namespace: {{ template "logstash.namespace" . }} +roleRef: + kind: Role + name: {{ $fullName | quote }} + apiGroup: rbac.authorization.k8s.io +{{- end -}} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/secret.yaml b/helm/atlas/charts/logstash/templates/secret.yaml new file mode 100644 index 00000000000..41272be1c51 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/secret.yaml @@ -0,0 +1,30 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.secrets }} +{{- $fullName := include "logstash.fullname" . -}} +{{- range .Values.secrets }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ printf "%s-%s" $fullName .name | quote }} + namespace: {{ include "logstash.namespace" $ }} + labels: + app: {{ $fullName | quote }} + chart: {{ $.Chart.Name | quote }} + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +data: +{{- range $key, $val := .value }} + {{- if hasSuffix "filepath" $key }} + {{ $key | replace ".filepath" "" }}: {{ $.Files.Get $val | b64enc | quote }} + {{ else }} + {{ $key }}: {{ $val | b64enc | quote }} + {{- end }} +{{- end }} +type: Opaque +{{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/charts/logstash/templates/service-headless.yaml b/helm/atlas/charts/logstash/templates/service-headless.yaml new file mode 100644 index 00000000000..1c8fad92057 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/service-headless.yaml @@ -0,0 +1,23 @@ +{{- if .Values.global.atlasLogstash.enabled }} +--- +kind: Service +apiVersion: v1 +metadata: + name: "{{ template "logstash.fullname" . }}-headless" + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} +{{- if .Values.labels }} +{{ toYaml .Values.labels | indent 4 }} +{{- end }} +spec: + clusterIP: None + selector: + app: "{{ template "logstash.fullname" . }}" + ports: + - name: http + port: {{ .Values.httpPort }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/service.yaml b/helm/atlas/charts/logstash/templates/service.yaml new file mode 100644 index 00000000000..552c7e62b83 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/service.yaml @@ -0,0 +1,35 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.service }} +--- +kind: Service +apiVersion: v1 +metadata: + name: "{{ template "logstash.fullname" . }}" + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + annotations: +{{ toYaml .Values.service.annotations | indent 4 }} +spec: + type: {{ .Values.service.type }} +{{- if .Values.service.loadBalancerIP }} + loadBalancerIP: {{ .Values.service.loadBalancerIP }} +{{- end }} +{{- with .Values.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: +{{ toYaml . | indent 4 }} +{{- end }} +{{- if .Values.service.externalTrafficPolicy }} + externalTrafficPolicy: {{ .Values.service.externalTrafficPolicy }} +{{- end }} + selector: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + release: {{ .Release.Name | quote }} + ports: +{{ toYaml .Values.service.ports | indent 4 }} +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/serviceaccount.yaml b/helm/atlas/charts/logstash/templates/serviceaccount.yaml new file mode 100644 index 00000000000..4636326a750 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/serviceaccount.yaml @@ -0,0 +1,25 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.rbac.create -}} +{{- $fullName := include "logstash.fullname" . -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "{{ template "logstash.serviceAccount" . }}" + namespace: {{ template "logstash.namespace" . }} + annotations: + {{- with .Values.rbac.serviceAccountAnnotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- if .Values.rbac.annotations }} + annotations: + {{- range $key, $value := .Values.rbac.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +{{- end -}} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/servicemonitor.yaml b/helm/atlas/charts/logstash/templates/servicemonitor.yaml new file mode 100644 index 00000000000..7350406b234 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/servicemonitor.yaml @@ -0,0 +1,71 @@ +{{- if .Values.global.atlasLogstash.enabled }} +{{- if .Values.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ template "logstash.fullname" . }}-metrics + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ template "logstash.chart" . }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- if .Values.serviceMonitor.labels }} + {{- toYaml .Values.serviceMonitor.labels | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + app: "{{ template "logstash.fullname" . }}" + endpoints: + # Prometheus exporter metrics (PRIMARY - for dashboard) + - port: metrics # Port 9304 - logstash-exporter + path: /metrics + interval: {{ .Values.serviceMonitor.interval | default "15s" }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout | default "10s" }} + honorLabels: true + relabelings: + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: namespace + - sourceLabels: [__meta_kubernetes_pod_label_app] + targetLabel: app + metricRelabelings: + # Add job label for all metrics + - sourceLabels: [] + targetLabel: job + replacement: "logstash" + # Add clusterName label for dashboard compatibility + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: clusterName + replacement: "$1" + + # Logstash native API metrics (SECONDARY - for additional context) + - port: http # Port 9600 + path: /_node/stats + interval: {{ .Values.serviceMonitor.interval | default "15s" }} + scrapeTimeout: {{ .Values.serviceMonitor.scrapeTimeout | default "10s" }} + honorLabels: true + relabelings: + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: namespace + - sourceLabels: [__meta_kubernetes_pod_label_app] + targetLabel: app + metricRelabelings: + # Add job label for all metrics + - sourceLabels: [] + targetLabel: job + replacement: "logstash-native" # Different job name to avoid conflicts + # Add clusterName label for dashboard compatibility + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: clusterName + replacement: "$1" +{{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/templates/statefulset.yaml b/helm/atlas/charts/logstash/templates/statefulset.yaml new file mode 100644 index 00000000000..b5fe33471e7 --- /dev/null +++ b/helm/atlas/charts/logstash/templates/statefulset.yaml @@ -0,0 +1,240 @@ +{{- if .Values.global.atlasLogstash.enabled }} +--- +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "logstash.fullname" . }} + namespace: {{ template "logstash.namespace" . }} + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + serviceName: {{ template "logstash.fullname" . }}-headless + selector: + matchLabels: + app: "{{ template "logstash.fullname" . }}" + release: {{ .Release.Name | quote }} + replicas: {{ .Values.replicas }} + podManagementPolicy: {{ .Values.podManagementPolicy }} + updateStrategy: + type: {{ .Values.updateStrategy }} + {{- if .Values.persistence.enabled }} + volumeClaimTemplates: + - metadata: + name: {{ template "logstash.fullname" . }} + {{- with .Values.persistence.annotations }} + annotations: +{{ toYaml . | indent 8 }} + {{- end }} + spec: +{{ toYaml .Values.volumeClaimTemplate | indent 6 }} + {{- end }} + template: + metadata: + name: "{{ template "logstash.fullname" . }}" + labels: + app: "{{ template "logstash.fullname" . }}" + chart: "{{ .Chart.Name }}" + heritage: {{ .Release.Service | quote }} + release: {{ .Release.Name | quote }} + {{- range $key, $value := .Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- /* This forces a restart if the configmap has changed */}} + {{- if .Values.logstashConfig }} + configchecksum: {{ include (print .Template.BasePath "/configmap-config.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- /* This forces a restart if the configmap has changed */}} + {{- if .Values.logstashPipeline }} + pipelinechecksum: {{ include (print .Template.BasePath "/configmap-pipeline.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- if .Values.logstashPattern }} + patternchecksum: {{ include (print .Template.BasePath "/configmap-pattern.yaml") . | sha256sum | trunc 63 }} + {{- end }} + {{- if .Values.secrets }} + secretschecksum: {{ include (print .Template.BasePath "/secret.yaml") . | sha256sum | trunc 63 }} + {{- end }} + spec: + {{- if .Values.schedulerName }} + schedulerName: "{{ .Values.schedulerName }}" + {{- end }} + securityContext: +{{ toYaml .Values.podSecurityContext | indent 8 }} + {{- if or .Values.rbac.create .Values.rbac.serviceAccountName }} + serviceAccountName: "{{ template "logstash.serviceAccount" . }}" + {{- end }} + {{- with .Values.tolerations }} + tolerations: +{{ toYaml . | indent 6 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity .Values.podAffinity }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + affinity: + {{- end }} + {{- if eq .Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "logstash.fullname" .}}" + topologyKey: {{ .Values.antiAffinityTopologyKey }} + {{- else if eq .Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + podAffinityTerm: + topologyKey: {{ .Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - "{{ template "logstash.fullname" . }}" + {{- end }} + {{- with .Values.nodeAffinity }} + nodeAffinity: +{{ toYaml . | indent 10 }} + {{- end }} + {{- with .Values.podAffinity }} + podAffinity: +{{ toYaml . | indent 10 }} + {{- end }} + terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} + volumes: + {{- range .Values.secretMounts }} + - name: {{ .name }} + secret: + secretName: {{ .secretName }} + {{- end }} + {{- if .Values.logstashConfig }} + - name: logstashconfig + configMap: + name: {{ template "logstash.fullname" . }}-config + {{- end }} + {{- if .Values.logstashPipeline }} + - name: logstashpipeline + configMap: + name: {{ template "logstash.fullname" . }}-pipeline + {{- end }} + {{- if .Values.logstashPattern }} + - name: logstashpattern + configMap: + name: {{ template "logstash.fullname" . }}-pattern + {{- end }} + {{- if .Values.extraVolumes }} + {{- if eq "string" (printf "%T" .Values.extraVolumes) }} +{{ tpl .Values.extraVolumes . | indent 8 }} + {{- else }} +{{ toYaml .Values.extraVolumes | indent 8 }} + {{- end }} + {{- end }} + {{- if .Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml .Values.imagePullSecrets | indent 8 }} + {{- end}} + {{- if .Values.hostAliases }} + hostAliases: {{ toYaml .Values.hostAliases | nindent 6 }} + {{- end }} + {{- if .Values.extraInitContainers }} + initContainers: + {{- if eq "string" (printf "%T" .Values.extraInitContainers) }} +{{ tpl .Values.extraInitContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraInitContainers | indent 6 }} + {{- end }} + {{- end }} + containers: + - name: "{{ template "logstash.name" . }}" + securityContext: +{{ toYaml .Values.securityContext | indent 10 }} + image: "{{ .Values.image }}:{{ .Values.imageTag }}" + imagePullPolicy: "{{ .Values.imagePullPolicy }}" + livenessProbe: +{{ toYaml .Values.livenessProbe | indent 10 }} + readinessProbe: +{{ toYaml .Values.readinessProbe | indent 10 }} + ports: + - name: http + containerPort: {{ .Values.httpPort }} + {{- if .Values.extraPorts }} + {{- toYaml .Values.extraPorts | nindent 8 }} + {{- end }} + resources: +{{ toYaml .Values.resources | indent 10 }} + env: + - name: LS_JAVA_OPTS + value: "{{ .Values.logstashJavaOpts }}" +{{- if .Values.extraEnvs }} +{{ toYaml .Values.extraEnvs | indent 10 }} +{{- end }} +{{- if .Values.envFrom }} + envFrom: +{{ toYaml .Values.envFrom | indent 10 }} +{{- end }} + volumeMounts: + {{- if .Values.persistence.enabled }} + - name: "{{ template "logstash.fullname" . }}" + mountPath: /usr/share/logstash/data + {{- end }} + {{- range .Values.secretMounts }} + - name: {{ .name }} + mountPath: {{ .path }} + {{- if .subPath }} + subPath: {{ .subPath }} + {{- end }} + {{- end }} + {{- range $path, $config := .Values.logstashConfig }} + - name: logstashconfig + mountPath: /usr/share/logstash/config/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- range $path, $config := .Values.logstashPipeline }} + - name: logstashpipeline + mountPath: /usr/share/logstash/pipeline/{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- if .Values.logstashPattern }} + {{- $logstashPatternDir := .Values.logstashPatternDir -}} + {{- range $path, $config := .Values.logstashPattern }} + - name: logstashpattern + mountPath: {{ $logstashPatternDir }}{{ $path }} + subPath: {{ $path }} + {{- end -}} + {{- end -}} + {{- if .Values.extraVolumeMounts }} + {{- if eq "string" (printf "%T" .Values.extraVolumeMounts) }} +{{ tpl .Values.extraVolumeMounts . | indent 10 }} + {{- else }} +{{ toYaml .Values.extraVolumeMounts | indent 10 }} + {{- end }} + {{- end }} +{{- if .Values.lifecycle }} + lifecycle: +{{ toYaml .Values.lifecycle | indent 10 }} +{{- end }} + {{- if .Values.extraContainers }} + {{- if eq "string" (printf "%T" .Values.extraContainers) }} +{{ tpl .Values.extraContainers . | indent 6 }} + {{- else }} +{{ toYaml .Values.extraContainers | indent 6 }} + {{- end }} + {{- end }} +{{- end }} diff --git a/helm/atlas/charts/logstash/values.yaml b/helm/atlas/charts/logstash/values.yaml new file mode 100644 index 00000000000..7cd345caa67 --- /dev/null +++ b/helm/atlas/charts/logstash/values.yaml @@ -0,0 +1,1015 @@ +--- +global: + atlasLogstash: + enabled: false + +replicas: 1 + +# Namespace for Logstash deployment +namespaceOverride: "atlas" + +# Allows you to add any config files in /usr/share/logstash/config/ +# such as logstash.yml and log4j2.properties +# +# Note that when overriding logstash.yml, `api.http.host: "0.0.0.0"` should always be included +# to make default probes work. +logstashConfig: + logstash.yml: | + api.http.host: "0.0.0.0" + api.http.port: 9600 + pipeline.workers: 3 + pipeline.batch.size: 800 + + # Dead Letter Queue configuration + dead_letter_queue.enable: true + dead_letter_queue.max_bytes: 1gb + path.dead_letter_queue: /usr/share/logstash/dlq + # Pipeline metrics + metric.collect: true + # Queue configuration + queue.type: persisted + queue.max_bytes: 2gb + queue.checkpoint.writes: 1024 + pipeline.batch.delay: 50 + path.queue: /usr/share/logstash/data/queue + queue.page_capacity: 64mb + queue.max_events: 0 + + log4j2.properties: | + status = error + name = LogstashPropertiesConfig + + appender.console.type = Console + appender.console.name = plain_console + appender.console.layout.type = PatternLayout + appender.console.layout.pattern = [%d{ISO8601}][%-5p][%-25c] %m%n + + rootLogger.level = info + rootLogger.appenderRef.console.ref = plain_console + +# Allows you to add any pipeline files in /usr/share/logstash/pipeline/ +### ***warn*** there is a hardcoded logstash.conf in the image, override it first +logstashPipeline: + atlas-entities.conf: | + input { + kafka { + bootstrap_servers => "kafka-0.kafka-headless.kafka.svc.cluster.local:9092,kafka-1.kafka-headless.kafka.svc.cluster.local:9092,kafka-2.kafka-headless.kafka.svc.cluster.local:9092" + topics => ["ATLAS_ENTITIES"] + group_id => "logstash-atlas-indexing" + client_id => "logstash-atlas-client" + consumer_threads => 3 + partition_assignment_strategy => "org.apache.kafka.clients.consumer.RoundRobinAssignor" + fetch_min_bytes => 1024 + fetch_max_bytes => 2097152 + codec => "json" + auto_offset_reset => "latest" + enable_auto_commit => true + session_timeout_ms => 30000 + heartbeat_interval_ms => 3000 + } + + # Uncomment for local testing with stdin + # stdin { codec => json_lines } + } + + filter { + # If Kafka codec failed to parse JSON -> drop + if "_jsonparsefailure" in [tags] { + drop { } + } + + # Extract nested message fields to top level + if [message][entity] { + ruby { + code => " + # Copy nested message fields to top level + message = event.get('message') + if message + event.set('entity', message['entity']) if message['entity'] + event.set('operationType', message['operationType']) if message['operationType'] + event.set('eventTime', message['eventTime']) if message['eventTime'] + event.set('mutatedDetails', message['mutatedDetails']) if message['mutatedDetails'] + + # Extract internalAttributes from entity for easy access + if message['entity'] && message['entity']['internalAttributes'] + event.set('internalAttributes', message['entity']['internalAttributes']) + end + end + " + } + } + + # Only process relevant operations + if [operationType] not in ["ENTITY_CREATE", "ENTITY_UPDATE", "ENTITY_DELETE", "CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + drop { } + } + + # If no entity object, drop + if ![entity] { + mutate { + add_tag => [ "no_entity_field" ] + add_field => { + "[@metadata][drop_reason]" => "Missing entity field" + "[@metadata][dropped_at]" => "%{+YYYY.MM.dd HH:mm:ss}" + } + } + drop { } + } + + # Transform Atlas entity to ES document + if [operationType] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + ruby { + code => " + require 'json' + require 'time' + + entity = event.get('entity') + mutated_details = event.get('mutatedDetails') + operation_type = event.get('operationType') + + # Skip if no entity + if entity.nil? + event.cancel + return + end + + # Get attributes based on operation type + attrs = {} + if operation_type == 'ENTITY_UPDATE' + # For UPDATE: Only use changed fields from mutatedDetails.attributes + if !mutated_details.nil? && !mutated_details['attributes'].nil? + attrs = mutated_details['attributes'] + end + else + # For CREATE/DELETE: Use mutatedDetails.attributes or entity.attributes + if !mutated_details.nil? && !mutated_details['attributes'].nil? + attrs = mutated_details['attributes'] + elsif !entity['attributes'].nil? + attrs = entity['attributes'] + end + end + + # Build document based on operation type + if operation_type == 'ENTITY_UPDATE' + # For UPDATE: Build minimal document with only changed fields + essential metadata + document = { + '__modificationTimestamp' => (entity['updateTime'] || (Time.now.to_f * 1000).to_i), + '__modifiedBy' => (entity['updatedBy'] || 'atlas-kafka-sync') + } + + # internalAttributes priority override will happen later after all processing + + # addedRelationshipAttributes processing removed for testing + else + # For CREATE/DELETE: Build complete document with metadata + document = { + '__typeName' => entity['typeName'], + '__guid' => entity['guid'], + '__esDocId' => (entity['docId'] || entity['guid']), + '__state' => (entity['status'] || 'ACTIVE'), + '__timestamp' => (entity['createTime'] || (Time.now.to_f * 1000).to_i), + '__modificationTimestamp' => (entity['updateTime'] || (Time.now.to_f * 1000).to_i), + '__createdBy' => (entity['createdBy'] || 'atlas-kafka-sync'), + '__modifiedBy' => (entity['updatedBy'] || 'atlas-kafka-sync'), + '__superTypeNames' => (entity['superTypeNames'] || []) + } + + # internalAttributes priority override will happen later after all processing + end + + # For non-UPDATE operations: Build additional metadata + if operation_type != 'ENTITY_UPDATE' + # superTypeNames building removed for testing + end + + # Function to check if field should be excluded + def should_exclude_field(key, value, operation_type = nil) + # Always exclude nil values + return true if value.nil? + + # Operation-aware empty value handling (arrays, strings, hashes) + if (value.is_a?(Array) && value.empty?) || value == '' || (value.is_a?(Hash) && value.empty?) + if operation_type == 'ENTITY_UPDATE' + return false # Preserve empty values in updates (semantic meaning) + else + return true # Exclude empty values in create/delete (prevent clutter) + end + end + + # Exclude arrays with relationship objects + if value.is_a?(Array) && !value.empty? + first_item = value[0] + if first_item.is_a?(Hash) && + first_item.key?('guid') && + first_item.key?('typeName') && + first_item.key?('uniqueAttributes') + return true + end + end + + # Exclude complex objects (but keep simple primitives/arrays) + if value.is_a?(Hash) && !value.empty? + return true + end + + false + end + + # Add non-excluded attributes + attrs.each do |key, value| + unless should_exclude_field(key, value, operation_type) + document[key] = value + end + end + + # For non-UPDATE operations: Compute lineage and hierarchy + if operation_type != 'ENTITY_UPDATE' + # relationshipAttributes lineage computation removed for testing + + # Defaults + document['tenantId'] = document['tenantId'] || 'default' + document['name'] = document['name'] || entity['displayText'] + end + + # qualifiedNameHierarchy building removed for testing + + # Custom attributes now handled automatically via internalAttributes + + # FINAL PRIORITY OVERRIDE: internalAttributes values are authoritative + # This happens LAST to ensure they override any entity attribute values + internal_attrs = event.get('internalAttributes') + if internal_attrs && internal_attrs.is_a?(Hash) + internal_attrs.each do |key, value| + document[key] = value # Final override - internalAttributes wins! + end + end + + # Put metadata and transformed fields back into event + # Enhanced docId resolution with ES lookup fallback + if document['__esDocId'] + es_doc_id = document['__esDocId'] + elsif entity['docId'] + es_doc_id = entity['docId'] + else + # FALLBACK: Query ES to find existing document by __guid + # This prevents orphaned GUID documents in UPDATE operations + es_doc_id = entity['guid'] # Default fallback + + # This will be handled by HTTP fallback filter below + # Don't override es_doc_id here - let the fallback logic handle it + end + + event.set('[@metadata][es_doc_id]', es_doc_id) + event.set('[@metadata][needs_guid_lookup]', es_doc_id == entity['guid'] && ['ENTITY_UPDATE', 'ENTITY_DELETE'].include?(operation_type)) + event.set('[@metadata][guid]', entity['guid']) # Store GUID for HTTP lookup + event.set('[@metadata][operation_type]', operation_type) + + # Extract deleteHandler for delete operations + if operation_type == 'ENTITY_DELETE' + delete_handler = entity['deleteHandler'] || 'SOFT' + event.set('[@metadata][delete_handler]', delete_handler) + end + + document.each do |k, v| + event.set(k, v) + end + + # Clean up + event.remove('entity') + event.remove('mutatedDetails') + event.remove('operationType') + event.remove('message') + event.remove('internalAttributes') + + # Remove Kafka message wrapper fields + event.remove('msgCreatedBy') + event.remove('msgSourceIP') + event.remove('msgCreationTime') + event.remove('msgSplitIdx') + event.remove('msgSplitCount') + event.remove('spooled') + event.remove('eventTime') + event.remove('version') + event.remove('source') + event.remove('msgCompressionKind') + + # Remove Logstash metadata + event.remove('@timestamp') + event.remove('@version') + event.remove('event') + " + } + } + + # Classification operations transformation + if [operationType] in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + ruby { + code => " + require 'json' + require 'time' + + entity = event.get('entity') + mutated_details = event.get('mutatedDetails') + operation_type = event.get('operationType') + + # Skip if no entity + if entity.nil? + event.cancel + return + end + + # Skip if no mutatedDetails for classification operations + if mutated_details.nil? || mutated_details.empty? + event.cancel + return + end + + # Ensure mutatedDetails is an array + if !mutated_details.is_a?(Array) + event.cancel + return + end + + # Detect direct vs propagated operation + # Compare entity.guid with mutatedDetails[0].entityGuid + entity_guid = entity['guid'] + first_mutated = mutated_details[0] + + if first_mutated.nil? || !first_mutated.is_a?(Hash) + event.cancel + return + end + + mutated_entity_guid = first_mutated['entityGuid'] + is_direct = (entity_guid == mutated_entity_guid) + + # Build base document with essential metadata + document = { + '__modificationTimestamp' => (entity['updateTime'] || (Time.now.to_f * 1000).to_i), + '__modifiedBy' => (entity['updatedBy'] || 'atlas-kafka-sync') + } + + # PRIORITY OVERRIDE: Use internalAttributes (same pattern as entity operations) + # This contains the authoritative, correctly computed classification fields + internal_attrs = event.get('internalAttributes') + if internal_attrs && internal_attrs.is_a?(Hash) + internal_attrs.each do |key, value| + document[key] = value # internalAttributes wins! (fixes classificationsText bug) + end + event.set('[@metadata][used_internal_attrs]', true) + event.set('[@metadata][internal_attrs_count]', internal_attrs.keys.length) + else + # Fallback logging if internalAttributes is missing (shouldn't happen) + event.set('[@metadata][used_internal_attrs]', false) + event.set('[@metadata][internal_attrs_missing]', true) + end + + # Set ES document ID and metadata (preserve existing logic) + es_doc_id = entity['docId'] || entity['guid'] + event.set('[@metadata][es_doc_id]', es_doc_id) + event.set('[@metadata][guid]', entity['guid']) + event.set('[@metadata][operation_type]', operation_type) + event.set('[@metadata][needs_guid_lookup]', es_doc_id == entity['guid']) + event.set('[@metadata][is_direct]', is_direct) + event.set('[@metadata][is_propagated]', !is_direct) + + # Enhanced debug info (improved from previous version) + event.set('[@metadata][debug_entity_guid]', entity_guid) + event.set('[@metadata][debug_mutated_guid]', mutated_entity_guid) + event.set('[@metadata][debug_operation]', operation_type) + event.set('[@metadata][debug_direct_operation]', is_direct) + + # Add classification-specific debug info + if operation_type == 'CLASSIFICATION_DELETE' + deleted_types = mutated_details.map { |md| md['typeName'] }.compact + event.set('[@metadata][debug_deleted_types]', deleted_types.join(',')) + elsif operation_type == 'CLASSIFICATION_ADD' + added_types = mutated_details.map { |md| md['typeName'] }.compact + event.set('[@metadata][debug_added_types]', added_types.join(',')) + end + + # Log final classification data for debugging + if document['__classificationsText'] + event.set('[@metadata][debug_classificationstext_length]', document['__classificationsText'].length) + end + if document['__traitNames'] + event.set('[@metadata][debug_direct_traits_count]', document['__traitNames'].length) + end + if document['__propagatedTraitNames'] + event.set('[@metadata][debug_propagated_traits_count]', document['__propagatedTraitNames'].length) + end + + # Add all fields to event + document.each do |k, v| + event.set(k, v) + end + + # Clean up original fields (preserve existing cleanup logic) + event.remove('entity') + event.remove('mutatedDetails') + event.remove('operationType') + event.remove('message') + event.remove('internalAttributes') + event.remove('msgCreatedBy') + event.remove('msgSourceIP') + event.remove('msgCreationTime') + event.remove('msgSplitIdx') + event.remove('msgSplitCount') + event.remove('spooled') + event.remove('eventTime') + event.remove('version') + event.remove('source') + event.remove('msgCompressionKind') + event.remove('@timestamp') + event.remove('@version') + event.remove('event') + " + } + } + + # ES Lookup Fallback for operations missing docId (UPDATE, DELETE, CLASSIFICATION_ADD, CLASSIFICATION_DELETE, CLASSIFICATION_UPDATE) + if [@metadata][needs_guid_lookup] { + http { + url => "http://atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200/janusgraph_vertex_index/_search" + verb => "POST" + headers => { "Content-Type" => "application/json" } + body_format => "json" + body => { + "query" => { + "term" => { + "__guid" => "%{[@metadata][guid]}" + } + } + "size" => 1 + "_source" => false + } + target_body => "es_lookup_response" + add_field => { "[@metadata][lookup_attempted]" => "true" } + } + + # Process ES lookup response + ruby { + code => " + response = event.get('es_lookup_response') + if response && response['hits'] && response['hits']['total']['value'] > 0 + # Found existing document - use its _id + existing_doc_id = response['hits']['hits'][0]['_id'] + event.set('[@metadata][es_doc_id]', existing_doc_id) + event.set('[@metadata][lookup_success]', true) + else + # No existing document found - keep GUID fallback + event.set('[@metadata][lookup_success]', false) + end + + # Clean up response + event.remove('es_lookup_response') + " + } + } + + # Remove empty/null fields to match Atlas behavior (skip for classification operations) + if [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + ruby { + code => " + # Remove fields with empty/null values (operation-aware) + operation_type = event.get('[@metadata][operation_type]') || 'ENTITY_CREATE' + + event.to_hash.each do |key, value| + should_remove = false + + if value.nil? + should_remove = true # Always remove nil values + elsif (value.is_a?(Array) && value.empty?) || value == '' || (value.is_a?(Hash) && value.empty?) + # Operation-aware empty value handling (same logic as should_exclude_field) + if operation_type == 'ENTITY_UPDATE' + should_remove = false # Preserve empty values in updates (semantic meaning) + else + should_remove = true # Remove empty values in create/delete (prevent clutter) + end + end + + if should_remove + event.remove(key) + end + end + " + } + } + + # decide ES action metadata + if [@metadata][operation_type] == "ENTITY_CREATE" { + mutate { add_field => { "[@metadata][es_action]" => "create" } } + } else if [@metadata][operation_type] == "ENTITY_UPDATE" { + mutate { add_field => { "[@metadata][es_action]" => "update" } } + } else if [@metadata][operation_type] == "ENTITY_DELETE" { + # Route based on deleteHandler type + if [@metadata][delete_handler] == "SOFT" { + mutate { + add_field => { "[@metadata][es_action]" => "update" } + add_field => { "[@metadata][is_soft_delete]" => "true" } + } + } else if [@metadata][delete_handler] == "HARD" { + mutate { add_field => { "[@metadata][es_action]" => "delete" } } + } else if [@metadata][delete_handler] == "PURGE" { + mutate { add_field => { "[@metadata][es_action]" => "purge" } } + } else { + # DEFAULT: fallback to soft delete for unknown handlers + mutate { + add_field => { "[@metadata][es_action]" => "update" } + add_field => { "[@metadata][is_soft_delete]" => "true" } + } + } + } else if [@metadata][operation_type] in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + mutate { add_field => { "[@metadata][es_action]" => "update" } } + } + + # Removed internal metrics - using Prometheus exporter instead + + # Add comprehensive logging for debugging + if "_grokparsefailure" in [tags] or "_rubyexception" in [tags] { + mutate { + add_field => { + "[@metadata][error_type]" => "transformation_error" + "[@metadata][error_timestamp]" => "%{+YYYY.MM.dd HH:mm:ss}" + } + } + } + } + + output { + # create path + if [@metadata][es_action] == "create" { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "%{[@metadata][es_action]}" + document_id => "%{[@metadata][es_doc_id]}" + timeout => 90 + compression_level => 3 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + } + + # update path - partial document updates + if [@metadata][es_action] == "update" and [@metadata][is_soft_delete] != "true" and [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "update" + document_id => "%{[@metadata][es_doc_id]}" + doc_as_upsert => true + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + } + + # classification operations - partial document updates + if [@metadata][es_action] == "update" and [@metadata][operation_type] in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "update" + document_id => "%{[@metadata][es_doc_id]}" + doc_as_upsert => false + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + } + + # soft-delete via update + inline painless script + if [@metadata][es_action] == "update" and [@metadata][is_soft_delete] == "true" { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "update" + document_id => "%{[@metadata][es_doc_id]}" + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + script_lang => "painless" + script => 'ctx._source.__state = "DELETED"; ctx._source.__modificationTimestamp = System.currentTimeMillis();' + script_type => "inline" + scripted_upsert => false + ilm_enabled => false + manage_template => false + } + } + + # hard-delete - physically remove document from ES + if [@metadata][es_action] == "delete" { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "delete" + document_id => "%{[@metadata][es_doc_id]}" + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + } + + # purge-delete - remove from main index and log for audit + if [@metadata][es_action] == "purge" { + # Delete from main index + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "delete" + document_id => "%{[@metadata][es_doc_id]}" + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + + # Log PURGE operation for audit trail + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "logstash-purge-audit-%{+YYYY.MM.dd}" + document_type => "_doc" + manage_template => false + } + } + + # Error logging + if "_grokparsefailure" in [tags] or "_rubyexception" in [tags] or "no_entity_field" in [tags] { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "logstash-errors-%{+YYYY.MM.dd}" + document_type => "_doc" + manage_template => false + } + + # Also log to stdout for immediate visibility + stdout { + codec => json_lines + } + } + + # Removed metrics output - using Prometheus exporter for monitoring + } + +# Allows you to add any pattern files in your custom pattern dir +logstashPatternDir: "/usr/share/logstash/patterns/" +logstashPattern: {} +# pattern.conf: | +# DPKG_VERSION [-+~<>\.0-9a-zA-Z]+ + +# Extra environment variables to append to this nodeGroup +# This will be appended to the current 'env:' key. You can use any of the kubernetes env +# syntax here +extraEnvs: [] +# - name: MY_ENVIRONMENT_VAR +# value: the_value_goes_here + +# Allows you to load environment variables from kubernetes secret or config map +envFrom: [] +# - secretRef: +# name: env-secret +# - configMapRef: +# name: config-map + +# Add sensitive data to k8s secrets +secrets: [] +# - name: "env" +# value: +# ELASTICSEARCH_PASSWORD: "LS1CRUdJTiBgUFJJVkFURSB" +# api_key: ui2CsdUadTiBasRJRkl9tvNnw +# - name: "tls" +# value: +# ca.crt: | +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# LS0tLS1CRUdJT0K +# cert.crt: "LS0tLS1CRUdJTiBlRJRklDQVRFLS0tLS0K" +# cert.key.filepath: "secrets.crt" # The path to file should be relative to the `values.yaml` file. + +# A list of secrets and their paths to mount inside the pod +secretMounts: [] + +hostAliases: [] +#- ip: "127.0.0.1" +# hostnames: +# - "foo.local" +# - "bar.local" + +image: "ghcr.io/atlanhq/logstash" +imageTag: "9.1.2-multiarch" +imagePullPolicy: "IfNotPresent" +imagePullSecrets: [] + +podAnnotations: + # Prometheus annotations for pod metrics scraping + prometheus.io/scrape: "true" + prometheus.io/port: "9304" + prometheus.io/path: "/metrics" + co.elastic.logs/module: logstash + co.elastic.logs/fileset.stdout: access + co.elastic.logs/fileset.stderr: error + # Pipeline status annotations + atlan.com/pipeline: "kafka-to-elasticsearch" + atlan.com/component: "logstash" + +# additionals labels +labels: + # Prometheus labels for service discovery + app.kubernetes.io/component: "data-pipeline" + app.kubernetes.io/part-of: "atlas" + atlan.com/monitoring: "enabled" + prometheus/scrape: "true" + +logstashJavaOpts: "-Xmx2g -Xms2g" + +resources: + requests: + cpu: "200m" + memory: "2560Mi" + limits: + cpu: "2000m" + memory: "3072Mi" + +volumeClaimTemplate: + accessModes: ["ReadWriteOnce"] + resources: + requests: + storage: 10Gi + +rbac: + create: true + serviceAccountAnnotations: {} + serviceAccountName: "" + annotations: + {} + #annotation1: "value1" + #annotation2: "value2" + #annotation3: "value3" + +podSecurityPolicy: + create: false + name: "" + spec: + privileged: false + fsGroup: + rule: RunAsAny + runAsUser: + rule: RunAsAny + seLinux: + rule: RunAsAny + supplementalGroups: + rule: RunAsAny + volumes: + - secret + - configMap + - persistentVolumeClaim + +persistence: + enabled: true + annotations: {} + +extraVolumes: + # Dead letter queue for failed messages + - name: dlq + emptyDir: {} + + # Metrics data directory - DISABLED (not using metricbeat) + # - name: metrics-data + # emptyDir: {} + + # Metricbeat configuration (created by template) - DISABLED (not using metricbeat) + # - name: metricbeat-config + # configMap: + # name: atlas-logstash-metricbeat-config + # defaultMode: 0644 + +extraVolumeMounts: + [] + # - name: extras + # mountPath: /usr/share/extras + # readOnly: true + +extraContainers: + # Metricbeat sidecar for detailed metrics collection - DISABLED (not using the data) + # - name: metricbeat + # image: docker.elastic.co/beats/metricbeat:8.11.0 + # args: + # - "-c" + # - "/etc/metricbeat.yml" + # - "-e" + # + # env: + # - name: ELASTIC_HOSTS + # value: "atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200" + # - name: POD_NAME + # valueFrom: + # fieldRef: + # fieldPath: metadata.name + # resources: + # requests: + # cpu: "50m" + # memory: "128Mi" + # limits: + # cpu: "200m" + # memory: "256Mi" + # volumeMounts: + # - name: metricbeat-config + # mountPath: /etc/metricbeat.yml + # subPath: metricbeat.yml + # readOnly: true + # - name: metrics-data + # mountPath: /usr/share/metricbeat/data + + # Prometheus exporter sidecar + - name: logstash-exporter + image: ghcr.io/atlanhq/prometheus-logstash-exporter:0.7.15-multiarch + args: + - "-logstash.host=localhost" + - "-logstash.port=9600" + - "-web.listen-address=:9304" + ports: + - containerPort: 9304 + name: metrics + protocol: TCP + resources: + requests: + cpu: "20m" + memory: "64Mi" + limits: + cpu: "100m" + memory: "128Mi" + +extraInitContainers: + [] + # - name: do-something + # image: busybox + # command: ['do', 'something'] + +# This is the PriorityClass settings as defined in +# https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#priorityclass +priorityClassName: "" + +# By default this will make sure two pods don't end up on the same node +# Changing this to a region would allow you to spread pods across regions +antiAffinityTopologyKey: "kubernetes.io/hostname" + +# Hard means that by default pods will only be scheduled if there are enough nodes for them +# and that they will never end up on the same node. Setting this to soft will do this "best effort" +antiAffinity: "hard" + +# This is the node affinity settings as defined in +# https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#node-affinity +nodeAffinity: {} + +# This is inter-pod affinity settings as defined in +# https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity +podAffinity: {} + +# The default is to deploy all pods serially. By setting this to parallel all pods are started at +# the same time when bootstrapping the cluster +podManagementPolicy: "Parallel" + +httpPort: 9600 + +# Custom ports to add to logstash +extraPorts: + [] + # - name: beats + # containerPort: 5001 + +updateStrategy: RollingUpdate + +# This is the max unavailable setting for the pod disruption budget +# The default value of 1 will make sure that kubernetes won't allow more than 1 +# of your pods to be unavailable during maintenance +maxUnavailable: 1 + +podSecurityContext: + fsGroup: 1000 + runAsUser: 1000 + +securityContext: + capabilities: + drop: + - ALL + # readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 1000 + +# How long to wait for logstash to stop gracefully +terminationGracePeriod: 60 + +# Probes +# Default probes are using `httpGet` which requires that `api.http.host: "0.0.0.0"` is part of +# `logstash.yml`. If needed probes can be disabled or overridden using the following syntaxes: +# +# disable livenessProbe +# livenessProbe: null +# +# replace httpGet default readinessProbe by some exec probe +# readinessProbe: +# httpGet: null +# exec: +# command: +# - curl +# - localhost:9600 + +livenessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 300 + periodSeconds: 10 + timeoutSeconds: 5 + failureThreshold: 3 + successThreshold: 1 + +readinessProbe: + httpGet: + path: / + port: http + initialDelaySeconds: 30 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + successThreshold: 1 + +## Use an alternate scheduler. +## ref: https://kubernetes.io/docs/tasks/administer-cluster/configure-multiple-schedulers/ +## +schedulerName: "" + +nodeSelector: {} +tolerations: [] + +nameOverride: "" +fullnameOverride: "" + +lifecycle: + {} + # preStop: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + # postStart: + # exec: + # command: ["/bin/sh", "-c", "echo Hello from the postStart handler > /usr/share/message"] + +service: + annotations: {} + type: ClusterIP + loadBalancerIP: "" + ports: + - name: beats + port: 5044 + protocol: TCP + targetPort: 5044 + - name: http + port: 9600 + protocol: TCP + targetPort: 9600 + - name: metrics + port: 9304 + protocol: TCP + targetPort: 9304 + +# ServiceMonitor for Prometheus scraping +serviceMonitor: + enabled: true + interval: "15s" + scrapeTimeout: "10s" + labels: + prometheus: kube-prometheus + release: prometheus-operator + app.kubernetes.io/name: logstash + annotations: + prometheus.io/scrape: "true" + +ingress: + enabled: false + annotations: + {} + # kubernetes.io/tls-acme: "true" + className: "nginx" + pathtype: ImplementationSpecific + hosts: + - host: logstash-example.local + paths: + - path: /beats + servicePort: 5044 + - path: /http + servicePort: 8080 + tls: [] + # - secretName: logstash-example-tls + # hosts: + # - logstash-example.local diff --git a/helm/atlas/templates/NOTES.txt b/helm/atlas/templates/NOTES.txt new file mode 100644 index 00000000000..6e602e9cb99 --- /dev/null +++ b/helm/atlas/templates/NOTES.txt @@ -0,0 +1,19 @@ +1. Get the application URL by running these commands: +{{- if .Values.atlas.ingress.enabled }} +{{- range .Values.atlas.ingress.hosts }} + http://{{ . }} +{{- end }} +{{- else if contains "NodePort" .Values.atlas.service.type }} + export NODE_IP=$(kubectl get nodes --namespace {{ .Values.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.atlas.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get svc -w {{ template "fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Values.Namespace }} {{ template "fullname" . }} -o jsonpath='{.status.loadBalancer.ingress[0].ip}') + echo http://$SERVICE_IP:{{ .Values.atlas.service.port }} +{{- else if contains "ClusterIP" .Values.atlas.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Values.Namespace }} -l "app={{ template "name" . }},release={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + kubectl port-forward $POD_NAME 8080:{{ .Values.atlas.service.port }} + echo "Visit http://127.0.0.1:8080 to use your application" + echo "Default username/password is admin/admin" +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/_helpers.tpl b/helm/atlas/templates/_helpers.tpl new file mode 100644 index 00000000000..2f1b146cddc --- /dev/null +++ b/helm/atlas/templates/_helpers.tpl @@ -0,0 +1,47 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +*/}} +{{- define "fullname" -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Return the appropriate apiVersion for ingress. +*/}} +{{- define "atlas.ingress.apiVersion" -}} + {{- if and (.Capabilities.APIVersions.Has "networking.k8s.io/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} + {{- print "networking.k8s.io/v1" -}} + {{- else if .Capabilities.APIVersions.Has "networking.k8s.io/v1beta1" -}} + {{- print "networking.k8s.io/v1beta1" -}} + {{- else -}} + {{- print "extensions/v1beta1" -}} + {{- end -}} +{{- end -}} +{{/* +Return if ingress is stable. +*/}} +{{- define "atlas.ingress.isStable" -}} + {{- eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1" -}} +{{- end -}} +{{/* +Return if ingress supports ingressClassName. +*/}} +{{- define "atlas.ingress.supportsIngressClassName" -}} + {{- or (eq (include "atlas.ingress.isStable" .) "true") (and (eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}} +{{- end -}} +{{/* +Return if ingress supports pathType. +*/}} +{{- define "atlas.ingress.supportsPathType" -}} + {{- or (eq (include "atlas.ingress.isStable" .) "true") (and (eq (include "atlas.ingress.apiVersion" .) "networking.k8s.io/v1beta1") (semverCompare ">= 1.18-0" .Capabilities.KubeVersion.Version)) -}} +{{- end -}} \ No newline at end of file diff --git a/helm/atlas/templates/atlas-audit-index-configmap.yaml b/helm/atlas/templates/atlas-audit-index-configmap.yaml new file mode 100644 index 00000000000..d22fc45a33b --- /dev/null +++ b/helm/atlas/templates/atlas-audit-index-configmap.yaml @@ -0,0 +1,162 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-audit-index + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-audit.sh: | + curl -X PUT "atlas-elasticsearch-master.atlas.svc.cluster.local:9200/_template/atlas.audit?pretty" -H 'Content-Type: application/json' -d' + { + "index_patterns": ["atlas.audit*"], + "settings": { + "number_of_shards" : 3, + "number_of_replicas" : 1, + "analysis": { + "analyzer": { + "resource_path": { + "tokenizer": "resource_hierarchy" + } + }, + "tokenizer": { + "resource_hierarchy": { + "type": "path_hierarchy", + "delimiter": "/" + } + } + } + }, + "mappings": { + "properties": { + "all": { + "type": "text" + }, + "Action": { + "type": "keyword" + }, + "glossaryQualifiedName": { + "type": "keyword" + }, + "Level": { + "type": "keyword" + }, + "Type": { + "type": "keyword" + }, + "access": { + "type": "keyword", + "copy_to": [ + "all" + ] + }, + "action": { + "type": "keyword" + }, + "agent": { + "type": "keyword" + }, + "agentHost": { + "type": "keyword" + }, + "cliIP": { + "type": "keyword" + }, + "cluster_name": { + "type": "keyword" + }, + "datetime": { + "type": "text" + }, + "enforcer": { + "type": "keyword" + }, + "event_count": { + "type": "integer" + }, + "event_dur_ms": { + "type": "integer" + }, + "evtTime": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss.SSS" + }, + "id": { + "type": "keyword" + }, + "logType": { + "type": "keyword" + }, + "logtimestamp": { + "type": "date", + "format": "yyyy-MM-dd HH:mm:ss,SSS" + }, + "policy": { + "type": "integer" + }, + "policy_version": { + "type": "integer" + }, + "qualifiedName": { + "type": "keyword" + }, + "qualifiedNameText": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "qnComponents": { + "type": "nested" + }, + "repo": { + "type": "keyword" + }, + "repoType": { + "type": "keyword" + }, + "reqUser": { + "type": "keyword", + "copy_to": [ + "all" + ] + }, + "resType": { + "type": "keyword" + }, + "resource": { + "type": "text", + "copy_to": [ + "all" + ] + }, + "result": { + "type": "keyword" + }, + "seq_num": { + "type": "integer" + }, + "tags": { + "type": "text" + } + } + } + } + ' + + + + + + + + + + + + + + diff --git a/helm/atlas/templates/atlas-logback-config-configmap.yaml b/helm/atlas/templates/atlas-logback-config-configmap.yaml new file mode 100644 index 00000000000..99e892156bc --- /dev/null +++ b/helm/atlas/templates/atlas-logback-config-configmap.yaml @@ -0,0 +1,249 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-logback-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-logback.xml: | + + + + + + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + true + * + true + true + + + + + + filter + atlas-audit + + + + logback: %d %-5p - atlas-audit - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-auth-audit + + + + logback: %d %-5p - atlas-auth-audit - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-application + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-perf + + + + logback: %d %-5p - atlas-perf - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-ranger + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-metrics + + + + logback: %d %-5p - atlas-metrics - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + filter + atlas-tasks + + + + logback: %d %-5p - atlas-application - [%t:%X{context-key}] - X-Atlan-Request-Id:[%X{X-Atlan-Request-Id}] - traceId:[%X{trace_id}] ~ %m \(%C{1}:%L\)%n + + + + INFO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/helm/atlas/templates/configmap-init-container-script.yaml b/helm/atlas/templates/configmap-init-container-script.yaml new file mode 100644 index 00000000000..d055710013a --- /dev/null +++ b/helm/atlas/templates/configmap-init-container-script.yaml @@ -0,0 +1,120 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-init-container-script + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-init-container.sh: | + #!/bin/sh + + echo "[+] Checking for Cassandra and Elasticsearch" + /tmp/atlas-init.sh + + echo "[+] Copying Config File" + cp /tmp/configfile/atlas-application.properties /tmp/newconfigfile/atlas-application.properties + + echo "[+] Checking for Keycloak" + until printf "." && curl -s $KEYCLOAK_ADDRESS/realms/master; do + sleep 2; + done; + echo 'Keycloak OK ✓' + + # Check for Keycloak realms and clients + echo "[+] Checking for Keycloak realms and clients" + # Set variables + KEYCLOAK_SERVER=$KEYCLOAK_ADDRESS + KEYCLOAK_REALM="master" + KEYCLOAK_USERNAME="batman" + KEYCLOAK_PASSWORD=$RANGER_PASSWORD + CLIENT_NAME="atlan-backend" + REALM_NAME="default" + REALM_EXISTS=false + CLIENT_EXISTS=false + + until [ "$REALM_EXISTS" = true ] && [ "$CLIENT_EXISTS" = true ] + do + # Fetch token + TOKEN_RESPONSE=$(curl -s -X POST "${KEYCLOAK_SERVER}/realms/${KEYCLOAK_REALM}/protocol/openid-connect/token" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=${KEYCLOAK_USERNAME}" \ + -d "password=${KEYCLOAK_PASSWORD}" \ + -d 'grant_type=password' \ + -d 'client_id=admin-cli') + + ACCESS_TOKEN=$(echo $TOKEN_RESPONSE | jq -r .access_token) + + if [ "$ACCESS_TOKEN" == "null" ]; then + echo "Error: Unable to fetch access token. Please check your server, realm, username, and password." + sleep 5 + continue + fi + + # Check if realm exists + REALM_RESPONSE=$(curl -s -X GET "${KEYCLOAK_SERVER}/admin/realms/${REALM_NAME}" \ + -H "Authorization: Bearer $ACCESS_TOKEN") + + REALM_ERROR=$(echo $REALM_RESPONSE | jq -r '.error // empty') + + if [ "$REALM_RESPONSE" == "" ] || [ "$REALM_ERROR" != "" ]; then + echo "Realm ${REALM_NAME} does not exist or could not be retrieved. Retrying in 5 seconds..." + REALM_EXISTS=false + sleep 5 + continue + else + echo "Realm ${REALM_NAME} exists" + REALM_EXISTS=true + fi + + # Check if client exists + CLIENT_ID_RESPONSE=$(curl -s -X GET "${KEYCLOAK_SERVER}/admin/realms/${REALM_NAME}/clients?clientId=${CLIENT_NAME}" \ + -H "Authorization: Bearer $ACCESS_TOKEN") + + CLIENT_ID=$(echo $CLIENT_ID_RESPONSE | jq -r '.[0].id') + + if [ "$CLIENT_ID_RESPONSE" == "" ] || [ "$CLIENT_ID" == "null" ]; then + echo "Client ${CLIENT_NAME} does not exist in realm ${REALM_NAME} or could not be retrieved. Retrying in 5 seconds..." + CLIENT_EXISTS=false + sleep 5 + continue + else + echo "Client ${CLIENT_NAME} exists in realm ${REALM_NAME}" + CLIENT_EXISTS=true + fi + done + + echo "[+] Checking for Cache Invalidation Proxy" + until printf "." && curl -s http://cinv.atlas.svc.cluster.local:5000/health; do + sleep 2; + done; + echo '[+] Cache Invalidation Proxy OK ✓' + + echo "[+] Checking for Zookeeper" + while true + do + sleep 5 + echo "[+] checking for zookeeper service" + leader=`echo stat | nc zookeeper 2181 | grep leader |wc -l` + echo $leader + if [ $leader -eq 1 ]; then echo "Zookeeper cluster up"; break; fi + done + echo 'Zookeeper OK ✓' + + echo "[+] Checking for Kafka" + host='kafka-headless.kafka.svc.cluster.local' + port=9092 + while true; do + if nc -z -w 1 "$host" "$port"; then + echo "(+) Kafka OK" + break + else + echo "(-) Kafka is either down or in the process of a restart" + sleep 10 + fi + done + echo "[+] Atlas audit indexing" + /scripts/atlas-audit.sh \ No newline at end of file diff --git a/helm/atlas/templates/configmap-init-script.yaml b/helm/atlas/templates/configmap-init-script.yaml new file mode 100644 index 00000000000..f0bf0994dd6 --- /dev/null +++ b/helm/atlas/templates/configmap-init-script.yaml @@ -0,0 +1,259 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-init-script + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-init.sh: | + #!/bin/sh + CASSANDRA_HOST="atlas-cassandra" + ES_HOST="atlas-elasticsearch-master" + ES_LOGGING="logging-master.logging.svc.cluster.local" + + ### Check cassandra health + echo "Checking if Cassandra is up and running ..." + retries=0 + # Try to connect on Cassandra CQLSH port 9042 + nc -z $CASSANDRA_HOST 9042 + cassandra_status=$? + + while [[ $retries -lt 10 && $cassandra_status != 0 ]]; do + echo "Cassandra doesn't reply to requests on ports 7199 and/or 9042. Sleeping for a while and trying again... retry ${retries}" + + # Sleep for a while + sleep 2s + + nc -z $CASSANDRA_HOST 9042 + cassandra_status=$? + + let "retries++" + done + + if [ $cassandra_status -ne 0 ]; then + echo "/!\ ERROR: Cassandra check has ended with errors" + exit 1 + else + echo "Cassandra check completed successfully --- OK" + fi + + ### Check elastic search health + + es_status=0 + retries=0 + while [[ $retries -lt 10 && $es_status == 0 ]]; do + echo "Checking if ElasticSearch is up and running ..." + + cluster_color=`curl -XGET -s http://$ES_HOST:9200/_cluster/health | \ + python3 -c 'import sys, json; print(json.dumps(json.load(sys.stdin)["status"], sort_keys=True, indent=4))'` + + echo "Cluster is ${cluster_color}" + + if [ "$cluster_color" != "\"green\"" ] && [ "$cluster_color" != "\"yellow\"" ] ; then + echo "Elasticsearch $ES_HOST is not up, retrying in 2 secs ..." + + # Sleep for a while + sleep 2s + else + es_status=1 + break + fi + + let "retries++" + done + + if [ $es_status -ne 0 ] ; then + echo "Elasticsearch check completed successfully --- OK" + else + echo "/!\ ERROR: Elasticsearch check has ended with errors" + exit 1 + fi + + # Check logging health and create index with mapping + + es_log_status=0 + retries=0 + while [[ $retries -lt 10 && $es_log_status == 0 ]]; do + echo "Checking if Logging ElasticSearch is up and running ..." + + cluster_color=`curl -XGET -s http://$ES_HOST:9200/_cluster/health | \ + python3 -c 'import sys, json; print(json.dumps(json.load(sys.stdin)["status"], sort_keys=True, indent=4))'` + + echo "Logging Cluster is ${cluster_color}" + + if [ "$cluster_color" != "\"green\"" ] && [ "$cluster_color" != "\"yellow\"" ] ; then + echo "Elasticsearch $ES_HOST is not up, retrying in 2 secs ..." + + # Sleep for a while + sleep 2s + else + es_log_status=1 + # create index + echo "Creating Index ..." + curl -kv -X PUT "http://$ES_HOST:9200/ranger-audit" \ + --header 'Content-Type: application/json' \ + --data-raw '{ + "mappings": { + "properties": { + "expire_at": { + "type": "date", + "store": true, + "doc_values": true + }, + "ttl": { + "type": "text", + "store": true + }, + "version": { + "type": "long", + "store": true, + "index": false + }, + "access": { + "type": "keyword" + }, + "action": { + "type": "keyword" + }, + "agent": { + "type": "keyword" + }, + "agentHost": { + "type": "keyword" + }, + "cliIP": { + "type": "keyword" + }, + "cliType": { + "type": "keyword" + }, + "cluster": { + "type": "keyword" + }, + "reqContext": { + "type": "keyword" + }, + "enforcer": { + "type": "keyword" + }, + "event_count": { + "type": "long", + "doc_values": true + }, + "event_dur_ms": { + "type": "long", + "doc_values": true + }, + "evtTime": { + "type": "date", + "doc_values": true + }, + "id": { + "type": "keyword", + "store": true + }, + "logType": { + "type": "keyword" + }, + "policy": { + "type": "long", + "doc_values": true + }, + "proxyUsers": { + "type": "keyword" + }, + "reason": { + "type": "text" + }, + "repo": { + "type": "keyword" + }, + "repoType": { + "type": "integer", + "doc_values": true + }, + "req_caller_id": { + "type": "keyword" + }, + "req_self_id": { + "type": "keyword" + }, + "reqData": { + "type": "text" + }, + "reqUser": { + "type": "keyword" + }, + "reqEntityGuid": { + "type": "keyword" + }, + "resType": { + "type": "keyword" + }, + "resource": { + "type": "keyword", + "fields": { + "text": { + "type": "text", + "analyzer": "atlan_ranger_text_analyzer" + } + } + }, + "result": { + "type": "integer" + }, + "seq_num": { + "type": "long", + "doc_values": true + }, + "sess": { + "type": "keyword" + }, + "tags": { + "type": "keyword" + }, + "tags_str": { + "type": "text" + }, + "text": { + "type": "text" + }, + "zoneName": { + "type": "keyword" + }, + "policyVersion": { + "type": "long" + } + } + }, + "settings": { + "index": { + "analysis": { + "analyzer": { + "atlan_ranger_text_analyzer": { + "filter": [ + "lowercase" + ], + "type": "custom", + "tokenizer": "atlan_ranger_tokenizer" + } + }, + "tokenizer": { + "atlan_ranger_tokenizer": { + "pattern": "( |_|-|'\''|/|@)", + "type": "pattern" + } + } + } + } + } + }' + break + fi + + let "retries++" + done diff --git a/helm/atlas/templates/configmap.yaml b/helm/atlas/templates/configmap.yaml new file mode 100644 index 00000000000..815408e7f55 --- /dev/null +++ b/helm/atlas/templates/configmap.yaml @@ -0,0 +1,503 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + atlas-application.properties: | + # + # Licensed to the Apache Software Foundation (ASF) under one + # or more contributor license agreements. See the NOTICE file + # distributed with this work for additional information + # regarding copyright ownership. The ASF licenses this file + # to you under the Apache License, Version 2.0 (the + # "License"); you may not use this file except in compliance + # with the License. You may obtain a copy of the License at + # + # http://www.apache.org/licenses/LICENSE-2.0 + # + # Unless required by applicable law or agreed to in writing, software + # distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + # See the License for the specific language governing permissions and + # limitations under the License. + # + + ######### Graph Database Configs ######### + + # Graph Database + + #Configures the graph database to use. Defaults to JanusGraph + #atlas.graphdb.backend=org.apache.atlas.repository.graphdb.janus.AtlasJanusGraphDatabase + + # Graph Storage + # Set atlas.graph.storage.backend to the correct value for your desired storage + # backend. Possible values: + # + # hbase + # cassandra + # embeddedcassandra - Should only be set by building Atlas with -Pdist,embedded-cassandra-solr + # berkeleyje + # + # See the configuration documentation for more information about configuring the various storage backends. + # + # atlas.graph.storage.backend=hbase2 + # atlas.graph.storage.hbase.table=apache_atlas_janus + + + #Hbase + #For standalone mode , specify localhost + #for distributed mode, specify zookeeper quorum here + # atlas.graph.storage.hostname= + # atlas.graph.storage.hbase.regions-per-server=1 + # atlas.graph.storage.lock.wait-time=10000 + + #In order to use Cassandra as a backend, comment out the hbase specific properties above, and uncomment the + #the following properties + #atlas.graph.storage.clustername= + #atlas.graph.storage.port= + atlas.graph.storage.backend=cql + atlas.graph.storage.hostname={{ .Chart.Name }}-cassandra + atlas.graph.storage.cql.keyspace={{ .Chart.Name }} + atlas.graph.storage.cql.replication-factor={{ .Values.cassandra.config.cluster_size }} + atlas.graph.storage.clustername={{ .Values.cassandra.config.cluster_name }} + atlas.graph.storage.port={{ .Values.cassandra.config.ports.cql }} + atlas.graph.query.fast-property=true + atlas.graph.query.batch=true + atlas.graph.storage.cql.remote-core-connections-per-host=5 + atlas.graph.storage.cql.remote-max-connections-per-host=5 + atlas.graph.storage.cql.request-timeout=5000 + atlas.graph.graph.replace-instance-if-exists=true + # Gremlin Query Optimizer + # + # Enables rewriting gremlin queries to maximize performance. This flag is provided as + # a possible way to work around any defects that are found in the optimizer until they + # are resolved. + #atlas.query.gremlinOptimizerEnabled=true + + # Delete handler + # + # This allows the default behavior of doing "soft" deletes to be changed. + # + # Allowed Values: + # org.apache.atlas.repository.store.graph.v1.SoftDeleteHandlerV1 - all deletes are "soft" deletes + # org.apache.atlas.repository.store.graph.v1.HardDeleteHandlerV1 - all deletes are "hard" deletes + # + atlas.DeleteHandlerV1.impl=org.apache.atlas.repository.store.graph.v1.SoftDeleteHandlerV1 + + # This allows delete-type selection per REST API call + # Ref: http://mail-archives.apache.org/mod_mbox/atlas-dev/201811.mbox/%3CJIRA.13169850.1530632244000.352730.1542268860569@Atlassian.JIRA%3E + atlas.rest.enable.delete.type.override=true + + # Entity audit repository + # + # This allows the default behavior of logging entity changes to hbase to be changed. + # + # Allowed Values: + # org.apache.atlas.repository.audit.HBaseBasedAuditRepository - log entity changes to hbase + # org.apache.atlas.repository.audit.CassandraBasedAuditRepository - log entity changes to cassandra + # org.apache.atlas.repository.audit.NoopEntityAuditRepository - disable the audit repository + # + atlas.EntityAuditRepository.impl=org.apache.atlas.repository.audit.NoopEntityAuditRepository + atlas.EntityAuditRepository.keyspace=atlas_audit + atlas.EntityAuditRepository.replicationFactor={{ .Values.cassandra.config.cluster_size }} + atlas.entity.audit.differential=true + atlas.EntityAuditRepositorySearch.impl=org.apache.atlas.repository.audit.ESBasedAuditRepository + # if Cassandra is used as a backend for audit from the above property, uncomment and set the following + # properties appropriately. If using the embedded cassandra profile, these properties can remain + # commented out. + # atlas.EntityAuditRepository.keyspace=atlas_audit + # atlas.EntityAuditRepository.replicationFactor=1 + + ######### Atlas Entity Attribute configs ######### + atlas.entities.attribute.allowed.large.attributes={{ .Values.atlas.config.entities_allowed_large_attributes }} + + # Graph Search Index + atlas.graph.index.search.backend=elasticsearch + + #Solr + #Solr cloud mode properties + # atlas.graph.index.search.solr.mode=cloud + # atlas.graph.index.search.solr.zookeeper-url= + # atlas.graph.index.search.solr.zookeeper-connect-timeout=60000 + # atlas.graph.index.search.solr.zookeeper-session-timeout=60000 + # atlas.graph.index.search.solr.wait-searcher=true + #Solr http mode properties + #atlas.graph.index.search.solr.mode=http + #atlas.graph.index.search.solr.http-urls=http://localhost:8983/solr + + # ElasticSearch support (Tech Preview) + # Comment out above solr configuration, and uncomment the following two lines. Additionally, make sure the + # hostname field is set to a comma delimited set of elasticsearch master nodes, or an ELB that fronts the masters. + # + # Elasticsearch does not provide authentication out of the box, but does provide an option with the X-Pack product + # https://www.elastic.co/products/x-pack/security + # + # Alternatively, the JanusGraph documentation provides some tips on how to secure Elasticsearch without additional + # plugins: http://docs.janusgraph.org/latest/elasticsearch.html + + atlas.graph.index.search.hostname=atlas-elasticsearch-master:9200 + atlas.graph.index.search.elasticsearch.client-only=true + atlas.graph.index.search.elasticsearch.retry_on_conflict=5 + atlas.graph.index.search.max-result-set-size=1000 + atlas.index.audit.elasticsearch.total_field_limit=10000 + atlas.index.audit.elasticsearch.refresh_interval: 1s + + + # Solr-specific configuration property + # atlas.graph.index.search.max-result-set-size=150 + + ######### Notification Configs ######### + atlas.kafka.bootstrap.servers=kafka-0.kafka-headless.kafka.svc.cluster.local:9092,kafka-1.kafka-headless.kafka.svc.cluster.local:9092,kafka-2.kafka-headless.kafka.svc.cluster.local:9092 + + atlas.kafka.zookeeper.session.timeout.ms=60000 + atlas.kafka.zookeeper.connection.timeout.ms=30000 + atlas.kafka.zookeeper.sync.time.ms=20 + atlas.kafka.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 + + atlas.kafka.auto.commit.interval.ms=1000 + atlas.kafka.hook.group.id=atlas + atlas.kafka.enable.auto.commit=false + atlas.kafka.auto.offset.reset=earliest + atlas.kafka.session.timeout.ms=30000 + atlas.kafka.offsets.topic.replication.factor=2 + atlas.kafka.poll.timeout.ms=2000 + + atlas.notification.create.topics=true + atlas.notification.replicas=3 + atlas.notification.topics=ATLAS_HOOK,ATLAS_ENTITIES + atlas.notification.log.failed.messages=true + atlas.notification.failed.messages.filename=atlas_hook_failed_messages.log + atlas.notification.consumer.retry.interval=3000 + atlas.notification.hook.retry.interval=3000 + # Enable for Kerberized Kafka clusters + #atlas.notification.kafka.service.principal=kafka/_HOST@EXAMPLE.COM + #atlas.notification.kafka.keytab.location=/etc/security/keytabs/kafka.service.keytab + + ## Server port configuration + #atlas.server.http.port=21000 + #atlas.server.https.port=21443 + + ######### Security Properties ######### + + # SSL config + atlas.enableTLS=false + + #truststore.file=/path/to/truststore.jks + #cert.stores.credential.provider.path=jceks://file/path/to/credentialstore.jceks + + #following only required for 2-way SSL + #keystore.file=/path/to/keystore.jks + + # Authentication config + + atlas.authentication.method.kerberos=false + atlas.authentication.method.file=false + + atlas.authentication.method.keycloak=true + atlas.authentication.method.keycloak.file=${sys:atlas.home}/conf/keycloak.json + atlas.authentication.method.keycloak.ugi-groups=false + atlas.authentication.method.keycloak.groups_claim=groups + + #### ldap.type= LDAP or AD + atlas.authentication.method.ldap.type=none + + #### user credentials file + atlas.authentication.method.file.filename=${sys:atlas.home}/conf/users-credentials.properties + + ### groups from UGI + #atlas.authentication.method.ldap.ugi-groups=true + + ######## LDAP properties ######### + #atlas.authentication.method.ldap.url=ldap://:389 + #atlas.authentication.method.ldap.userDNpattern=uid={0},ou=People,dc=example,dc=com + #atlas.authentication.method.ldap.groupSearchBase=dc=example,dc=com + #atlas.authentication.method.ldap.groupSearchFilter=(member=uid={0},ou=Users,dc=example,dc=com) + #atlas.authentication.method.ldap.groupRoleAttribute=cn + #atlas.authentication.method.ldap.base.dn=dc=example,dc=com + #atlas.authentication.method.ldap.bind.dn=cn=Manager,dc=example,dc=com + #atlas.authentication.method.ldap.bind.password= + #atlas.authentication.method.ldap.referral=ignore + #atlas.authentication.method.ldap.user.searchfilter=(uid={0}) + #atlas.authentication.method.ldap.default.role= + + + ######### Active directory properties ####### + #atlas.authentication.method.ldap.ad.domain=example.com + #atlas.authentication.method.ldap.ad.url=ldap://:389 + #atlas.authentication.method.ldap.ad.base.dn=(sAMAccountName={0}) + #atlas.authentication.method.ldap.ad.bind.dn=CN=team,CN=Users,DC=example,DC=com + #atlas.authentication.method.ldap.ad.bind.password= + #atlas.authentication.method.ldap.ad.referral=ignore + #atlas.authentication.method.ldap.ad.user.searchfilter=(sAMAccountName={0}) + #atlas.authentication.method.ldap.ad.default.role= + + ######### JAAS Configuration ######## + + #atlas.jaas.KafkaClient.loginModuleName = com.sun.security.auth.module.Krb5LoginModule + #atlas.jaas.KafkaClient.loginModuleControlFlag = required + #atlas.jaas.KafkaClient.option.useKeyTab = true + #atlas.jaas.KafkaClient.option.storeKey = true + #atlas.jaas.KafkaClient.option.serviceName = kafka + #atlas.jaas.KafkaClient.option.keyTab = /etc/security/keytabs/atlas.service.keytab + #atlas.jaas.KafkaClient.option.principal = atlas/_HOST@EXAMPLE.COM + + ######### Server Properties ######### + atlas.rest.address=http://localhost:21000 + # If enabled and set to true, this will run setup steps when the server starts + #atlas.server.run.setup.on.start=false + + ######### Entity Audit Configs ######### + # atlas.audit.hbase.tablename=apache_atlas_entity_audit + # atlas.audigit.zookeeper.session.timeout.ms=1000 + # atlas.audit.hbase.zookeeper.quorum=localhost:2181 + + ######### High Availability Configuration ######## + {{- if eq .Values.atlas_ha false }} + atlas.server.ha.enabled=false + {{- else if eq .Values.atlas_ha true }} + atlas.server.ha.enabled=true + {{- else }} + atlas.server.ha.enabled=false + {{- end }} + atlas.server.type.cache-refresher=http://cinv.atlas.svc.cluster.local:5000/cinv + atlas.server.type.cache-refresher-health=http://cinv.atlas.svc.cluster.local:5000/health + #### Enabled the configs below as per need if HA is enabled ##### + {{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} + atlas.server.ids=id1,id2 + atlas.server.address.id1=atlas-0.atlas-service-atlas.atlas.svc.cluster.local:21000 + atlas.server.address.id2=atlas-1.atlas-service-atlas.atlas.svc.cluster.local:21000 + {{- end }} + atlas.server.ha.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 + atlas.server.ha.zookeeper.retry.sleeptime.ms=10000 + atlas.server.ha.zookeeper.num.retries=18 + atlas.server.ha.zookeeper.session.timeout.ms=20000 + ## if ACLs need to be set on the created nodes, uncomment these lines and set the values ## + #atlas.server.ha.zookeeper.acl=: + #atlas.server.ha.zookeeper.auth=: + + ######### Atlas Authorization ######### + #atlas.authorizer.impl=none + + {{- if eq .Values.atlas_auth true }} + atlas.authorizer.impl=atlas + {{- else }} + atlas.authorizer.impl=org.apache.ranger.authorization.atlas.authorizer.RangerAtlasAuthorizer + {{- end }} + + atlas.authorizer.enable.delta_based_refresh={{ .Values.atlas.authorizer.enable_delta_based_refresh }} + atlas.authorizer.enable.abac={{ .Values.atlas.authorizer.enable_abac }} + + #atlas.authorizer.simple.authz.policy.file=atlas-simple-authz-policy.json + + ######### Atlas User service ######### + atlas.user-service-url=http://heracles-service.heracles.svc.cluster.local + + ######### Type Cache Implementation ######## + # A type cache class which implements + # org.apache.atlas.typesystem.types.cache.TypeCache. + # The default implementation is org.apache.atlas.typesystem.types.cache.DefaultTypeCache which is a local in-memory type cache. + #atlas.TypeCache.impl= + + ######### Performance Configs ######### + atlas.graph.storage.lock.retries=5 + + ######### Redis Cache Configs ######### + {{- if eq .Values.atlas.cache.enabled false }} + atlas.graph.cache.db-cache=false + {{- else if eq .Values.atlas.cache.enabled true }} + atlas.graph.cache.db-cache=true + atlas.graph.metrics.merge-stores=false + atlas.graph.cache.cache-type=redis + atlas.graph.cache.db-cache-expiry-time=86400000 + atlas.graph.cache.cache-keyspace-prefix=atlas + atlas.graph.cache.redis-db-id=1 + atlas.graph.cache.redis-client-name=atlas-metastore + atlas.graph.cache.redis-cache-size=100000 + atlas.graph.cache.redis-cache-server-mode=sentinel + atlas.graph.cache.redis-cache-server-url=redis://{{ .Values.atlas.redis.host }}:{{ .Values.atlas.redis.port }} + atlas.graph.cache.redis-cache-sentinel-urls={{ .Values.atlas.redis.sentinel_urls }} + atlas.graph.cache.redis-cache-lock-watchdog-ms=300000 + atlas.graph.cache.redis-cache-username={{ .Values.atlas.redis.username }} + atlas.graph.cache.redis-cache-password={{ .Values.atlas.redis.password }} + atlas.graph.cache.redis-cache-mastername={{ .Values.atlas.redis.master_name }} + atlas.graph.cache.redis-cache-connectTimeout=2000 + {{- end }} + + atlas.webserver.minthreads=40 + # Maximum number of threads in the atlas web server + atlas.webserver.maxthreads=400 + # Keepalive time in secs for the thread pool of the atlas web server + atlas.webserver.keepalivetimesecs=30 + # Queue size for the requests(when max threads are busy) for the atlas web server + atlas.webserver.queuesize=200 + ######### CSRF Configs ######### + atlas.rest-csrf.enabled=false + #atlas.rest-csrf.browser-useragents-regex=^Mozilla.*,^Opera.*,^Chrome.* + #atlas.rest-csrf.methods-to-ignore=GET,OPTIONS,HEAD,TRACE + #atlas.rest-csrf.custom-header=X-XSRF-HEADER + + ############ KNOX Configs ################ + #atlas.sso.knox.browser.useragent=Mozilla,Chrome,Opera + #atlas.sso.knox.enabled=true + #atlas.sso.knox.providerurl=https://:8443/gateway/knoxsso/api/v1/websso + #atlas.sso.knox.publicKey= + + ############ Atlas Metric/Stats configs ################ + # Format: atlas.metric.query.. + atlas.metric.query.cache.ttlInSecs=900 + #atlas.metric.query.general.typeCount= + #atlas.metric.query.general.typeUnusedCount= + #atlas.metric.query.general.entityCount= + #atlas.metric.query.general.tagCount= + #atlas.metric.query.general.entityDeleted= + # + #atlas.metric.query.entity.typeEntities= + #atlas.metric.query.entity.entityTagged= + # + #atlas.metric.query.tags.entityTags= + + ######### Compiled Query Cache Configuration ######### + + # The size of the compiled query cache. Older queries will be evicted from the cache + # when we reach the capacity. + + #atlas.CompiledQueryCache.capacity=1000 + + # Allows notifications when items are evicted from the compiled query + # cache because it has become full. A warning will be issued when + # the specified number of evictions have occurred. If the eviction + # warning threshold <= 0, no eviction warnings will be issued. + + #atlas.CompiledQueryCache.evictionWarningThrottle=0 + + + ######### Full Text Search Configuration ######### + + #Set to false to disable full text search. + #atlas.search.fulltext.enable=true + + + ########## Atlas Discovery ############# + atlas.objectId.support.entity.attributes=true + atlas.objectId.entity.attributes=AtlasGlossary:name,AtlasGlossaryTerm:name,AtlasGlossaryCategory:name + + + ######### Gremlin Search Configuration ######### + + #Set to false to disable gremlin search. + atlas.search.gremlin.enable=false + + + ########## Add http headers ########### + + #atlas.headers.Access-Control-Allow-Origin=* + #atlas.headers.Access-Control-Allow-Methods=GET,OPTIONS,HEAD,PUT,POST,DELETE + #atlas.headers.Access-Control-Allow-Headers=* + #atlas.headers.= + + + ########## Slack Notification ############# + atlas.notifications.slackWebhook={{ .Values.atlas.notification.slackWebhook }} + + {{ if .Values.atlas.redis.enabled }} + {{ printf "\n" }} + ########## Add query metastore ########### + atlan.cache.redis.host={{ .Values.atlas.redis.host }} + atlan.cache.redis.port={{ .Values.atlas.redis.port }} + atlan.cache.redis.password={{ .Values.atlas.redis.password }} + atlas.cache.redis.maxConnections={{ .Values.atlas.redis.maxConnections }} + atlas.cache.redis.timeout={{ .Values.atlas.redis.timeout }} + atlan.EntityCacheListener.impl=org.apache.atlas.repository.cache.EntityCacheListenerV2 + atlan.QueryCacheRepository.impl=org.apache.atlas.repository.cache.AtlanQueryCacheRepository + {{ printf "\n" }} + + {{ end }} + + + ########## Atlas Monitoring ############# + + atlas.graph.metrics.enabled = true + atlas.graph.metrics.jmx.enabled = true + atlas.statsd.enable = true + + + ########## Atlas deferred-actions (background tasks) ############# + + atlas.tasks.enabled = true + + + ########## Ranger Credentials + + atlas.ranger.username = admin + atlas.ranger.password = {{ .Values.atlas.ranger.RANGER_PASSWORD }} + atlas.ranger.base.url = {{ .Values.atlas.ranger.RANGER_SERVICE_URL }} + + ####### Redis credentials ####### + ###### allowed values for redis implementation ########## + # org.apache.atlas.service.redis.RedisServiceImpl - connects to sentinel cluster, for prod. + # org.apache.atlas.service.redis.RedisServiceLocalImpl - connects to local redis cluster, for local dev. + # org.apache.atlas.service.redis.NoRedisServiceImpl - default, dummy redis implementation. + atlas.redis.service.impl = org.apache.atlas.service.redis.RedisServiceImpl + atlas.redis.url = redis://{{ .Values.atlas.redis.host }}:{{ .Values.atlas.redis.port }} + atlas.redis.sentinel.urls = {{ .Values.atlas.redis.sentinel_urls }} + atlas.redis.username = {{ .Values.atlas.redis.username }} + atlas.redis.password = {{ .Values.atlas.redis.password }} + atlas.redis.master_name = {{ .Values.atlas.redis.master_name }} + atlas.redis.lock.wait_time.ms=15000 + # Renew lock for every 10mins + atlas.redis.lock.watchdog_timeout.ms=600000 + + atlas.jetty.request.buffer.size=32768 + + # valid uri patterns to collect metrics + atlas.metrics.uri_patterns=/api/(meta|atlas/v2)/glossary/terms/[^/]+/assignedEntities,/api/(meta|atlas/v2)/lineage/[^/]+,/api/(meta|atlas/v2)/lineage/list,/api/(meta|atlas/v2)/entity/accessors,/api/(meta|atlas/v2)/entity/auditSearch,/api/(meta|atlas/v2)/entity/bulk,/api/(meta|atlas/v2)/entity/bulk/setClassifications,/api/(meta|atlas/v2)/entity/bulk/uniqueAttribute,/api/(meta|atlas/v2)/entity/evaluator,/api/(meta|atlas/v2)/entity/guid/[^/]+,/api/(meta|atlas/v2)/entity/guid/[^/]+/businessmetadata,/api/(meta|atlas/v2)/entity/uniqueAttribute/type/[^/]+,/api/(meta|atlas/v2)/search/indexsearch,/api/(meta|atlas/v2)/entity/repairhaslineage,/api/(meta|atlas/v2)/types/typedef/name/[^/]+,/api/(meta|atlas/v2)/types/typedefs,/api/atlas/admin/metrics/prometheus,/api/atlas/admin/pushMetricsToStatsd,/api/atlas/v2/auth/download/policies/[^/]+,/api/atlas/v2/auth/download/roles/[^/]+,/api/atlas/v2/auth/download/users/[^/]+,/api/meta/entity/uniqueAttribute/type/[^/]+,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/groups,/auth/admin/realms/[^/]+/groups/[^/]+/role-mappings/realm,/auth/admin/realms/[^/]+/roles,/auth/admin/realms/[^/]+/roles-by-id/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+/composites,/auth/admin/realms/[^/]+/roles/[^/]+/groups,/auth/admin/realms/[^/]+/roles/[^/]+/users,/auth/admin/realms/[^/]+/users,/auth/admin/realms/[^/]+/users/[^/]+/groups,/auth/admin/realms/[^/]+/users/[^/]+/role-mappings/realm,/auth/realms/[^/]+/protocol/openid-connect/token,/auth/realms/[^/]+/protocol/openid-connect/token/introspect,/users/mappings,/roles/mappings,/api/(meta|atlas/v2)/business-policy/[^/]+/unlink-business-policy,/api/(meta|atlas/v2)/business-policy/link-business-policy,/api/(meta|atlas/v2)/direct/search,/api/(meta|atlas/v2)/attribute/update + + atlas.metrics.method_level.enable=true + atlas.metrics.method_patterns=policiesPrefetchFailed,processTermAssignments,elasticSearchQuery,elasticQueryTimeout,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,scrubEntityHeader,getDiffResult + {{ if .Values.atlas.janusgraph.atomic_mutation }} + ### Atomic batch related configs ### + atlas.graph.storage.cql.atomic-batch-mutate={{ .Values.atlas.janusgraph.atomic_mutation }} + atlas.graph.storage.buffer-size={{ .Values.atlas.janusgraph.janusgraph_tx_buffer_size }} + {{ end }} + + ######### Canary-Release ######### + atlas.canary.keycloak.token-introspection = {{ .Values.atlas.keycloak.token_introspection}} + atlas.keycloak.introspection.use.cache = {{ .Values.atlas.keycloak.introspection_cache }} + + ######### Atlas Inddexsearch configs ######### + atlas.indexsearch.enable.api.limit={{ .Values.atlas.indexsearch.enable_api_limit }} + atlas.indexsearch.query.size.max.limit={{ .Values.atlas.indexsearch.query_size_max_limit }} + atlas.indexsearch.async.enable={{ .Values.atlas.indexsearch.enable_async }} + atlas.indexsearch.async.search.keep.alive.time.in.seconds={{ .Values.atlas.indexsearch.request_timeout_in_secs }} + atlas.indexsearch.enable.janus.optimization={{ .Values.atlas.indexsearch.enable_janus_optimization }} + atlas.indexsearch.enable.janus.optimization.for.relationship={{ .Values.atlas.indexsearch.enable_janus_optimization_for_relationship }} + atlas.indexsearch.enable.janus.optimization.extended={{ .Values.atlas.indexsearch.enable_janus_optimization_extended }} + atlas.indexsearch.enable.janus.optimization.for.classifications={{ .Values.atlas.indexsearch.enable_janus_optimization_for_classifications }} + atlas.indexsearch.enable.janus.optimization.for.lineage={{ .Values.atlas.indexsearch.enable_janus_optimization_for_lineage }} + atlas.jg.super.vertex.edge.count={{ .Values.atlas.jg.super_vertex_edge_count }} + atlas.jg.super.vertex.edge.timeout={{ .Values.atlas.jg.super_vertex_edge_timeout }} + + ######### Atlas Bulk API configs ######### + atlas.bulk.api.max.entities.allowed={{ .Values.atlas.bulk.max_entities_allowed }} + atlas.bulk.api.enable.janus.optimization={{ .Values.atlas.bulk.enable_janus_optimization }} + + ######### Atlas Lineage configs ######### + atlas.lineage.optimised.calculation={{ .Values.atlas.lineage.optimised_calculation }} + atlas.lineage.enable.connection.lineage={{ .Values.atlas.lineage.enable_connection_lineage }} + ######### Atlas Distributed Task configs ######### + atlas.distributed.task.enabled={{ .Values.atlas.distributed_task.enabled }} + {{- if eq .Values.atlas.distributed_task.enabled true }} + atlas.relationship.cleanup.supported.asset.types={{ .Values.atlas.distributed_task.cleanup_supported_asset_types }} + atlas.relationship.cleanup.supported.relationship.labels={{ .Values.atlas.distributed_task.cleanup_supported_relationship_labels }} + {{- end }} + + ######### Atlas Typedefs update configs ######### + atlas.types.update.async.enable={{ .Values.atlas.types_update.async_enable }} + atlas.types.update.thread.count={{ .Values.atlas.types_update.thread_count }} + diff --git a/helm/atlas/templates/create-atlas-keycloak-config-cm.yaml b/helm/atlas/templates/create-atlas-keycloak-config-cm.yaml new file mode 100644 index 00000000000..742778777e8 --- /dev/null +++ b/helm/atlas/templates/create-atlas-keycloak-config-cm.yaml @@ -0,0 +1,32 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: create-atlas-keycloak-config-cm + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + create-atlas-keycloak-config.sh: | + cat < /opt/apache-atlas/conf/keycloak.json + { + "realm": "KEYCLOAK_REALM", + "auth-server-url": "AUTH_SERVER_URL", + "ssl-required": "none", + "resource": "KEYCLOAK_CLIENT_ID", + "public-client": false, + "confidential-port": 80, + "principal-attribute": "preferred_username", + "autodetect-bearer-only": true, + "credentials": { + "secret": "KEYCLOAK_CLIENT_SECRET" + } + } + EOF + sed -i "s|KEYCLOAK_REALM|$KEYCLOAK_REALM|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|AUTH_SERVER_URL|$AUTH_SERVER_URL|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|KEYCLOAK_CLIENT_ID|$KEYCLOAK_CLIENT_ID|g" "/opt/apache-atlas/conf/keycloak.json" + sed -i "s|KEYCLOAK_CLIENT_SECRET|$KEYCLOAK_CLIENT_SECRET|g" "/opt/apache-atlas/conf/keycloak.json" + echo "Keycloak Config Created" diff --git a/helm/atlas/templates/deployment.yaml b/helm/atlas/templates/deployment.yaml new file mode 100644 index 00000000000..4f6539c0950 --- /dev/null +++ b/helm/atlas/templates/deployment.yaml @@ -0,0 +1,270 @@ +{{- if and (.Values.deploy) (.Values.deploy.enabled) }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: atlas + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "atlas-config,atlas-logback-config,atlas-audit-index,atlas-keycloak-config,atlas-init-script,atlas-init-container-script,rate-limit-nginx-config" + secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-keycloak-config" +spec: + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + {{- if not (and (.Values.hpa) (.Values.hpa.enabled)) }} + replicas: {{ .Values.atlas.replicaCount }} + {{- end }} + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + annotations: +{{ toYaml .Values.atlas.podAnnotations | indent 8 }} + spec: + {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} + affinity: + nodeAffinity: + {{- if eq .Values.atlas.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.atlas.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + {{- if eq .Values.atlas.podAntiAffinity true }} + podAntiAffinity: + {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} + {{- if or .Values.atlas.tolerations $multiarchEnabled }} + tolerations: + {{- if .Values.atlas.tolerations }} + {{ toYaml .Values.atlas.tolerations | nindent 8 }} + {{- end }} + {{- if $multiarchEnabled }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- end }} + initContainers: + {{- if .Values.atlas.initContainers }} + {{- toYaml .Values.atlas.initContainers | nindent 8 }} + {{- end }} + serviceAccountName: cinv-sa + containers: + - name: {{ .Chart.Name }}-main + command: [ + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] + image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" + imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} + ports: + - containerPort: {{ .Values.atlas.service.targetPort }} + env: + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: Namespace + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_SERVICE_NAME + value: atlas + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://$(HOST_IP):4317 + - name: OTEL_RESOURCE_ATTRIBUTES + value: >- + k8s.pod.name=$(K8S_POD_NAME),k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs + {{- toYaml .Values.atlas.env | nindent 12 }} + {{- if eq .Values.albTenant true }} + - name: ALB_ENABLED + value: "true" + {{- end }} + envFrom: + - secretRef: + name: atlas-keycloak-config + {{- if .Values.multitenant }} + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name + {{- end }} + resources: + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if eq $tierType "Enterprise" }} + {{ toYaml .Values.atlas.resources | nindent 12 }} + {{- else if eq $tierType "Basic" }} + {{ toYaml .Values.atlas.resources_basic | nindent 12 }} + {{- else if eq $tierType "Standard" }} + {{ toYaml .Values.atlas.resources_standard | nindent 12 }} + {{- else }} + {{- toYaml .Values.atlas.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: atlas-config + mountPath: /opt/apache-atlas/conf/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-logback-config + mountPath: /opt/apache-atlas/conf/atlas-logback.xml + subPath: atlas-logback.xml + - name: create-atlas-keycloak-config + mountPath: /create-atlas-keycloak-config.sh + subPath: create-atlas-keycloak-config.sh + - name: atlas-logs + mountPath: /opt/apache-atlas/logs + {{- if .Values.atlas.lifecycle }} + lifecycle: + {{- toYaml .Values.atlas.lifecycle | nindent 12 }} + {{- end }} + {{- if .Values.atlas.livenessProbe }} + livenessProbe: + {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.readinessProbe }} + readinessProbe: + {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegrafd + imagePullPolicy: IfNotPresent + {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} + image: {{ .Values.multiarch.image.telegrafd }} + {{- else }} + image: ghcr.io/atlanhq/telegraf:1.24.3 + {{- end }} + resources: + {{- toYaml .Values.atlas.telegraf.resources | nindent 12 }} + volumeMounts: + - name: telegraf-conf + mountPath: /etc/telegraf/ + - name: nginx-log-volume + mountPath: /var/log/nginx + readOnly: true + ports: + - name: telegrafd + containerPort: 9273 + {{- end }} + {{- if .Values.nginx.enabled }} + - name: nginx-ratelimit + image: ghcr.io/atlanhq/nginx-vts-atlan-v2:1.27.5.1-multiarch + ports: + - containerPort: 8080 + protocol: TCP + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-log-volume + mountPath: /var/log/nginx + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} + {{- end }} + volumes: + - name: atlas-logs + emptyDir: {} + - name: atlas-config + configMap: + name: atlas-config + - name: atlas-logback-config + configMap: + name: atlas-logback-config + - name: create-atlas-keycloak-config + configMap: + name: create-atlas-keycloak-config-cm + defaultMode: 0755 + - name: atlas-init-script + configMap: + name: atlas-init-script + defaultMode: 0755 + - name: atlas-init-container-script + configMap: + name: atlas-init-container-script + defaultMode: 0755 + - name: atlas-audit-index + configMap: + name: atlas-audit-index + defaultMode: 0755 + - name: atlas-config-map-rw-vol + emptyDir: {} + {{- if .Values.nginx.enabled }} + - name: nginx-log-volume + emptyDir: {} + - name: nginx-config + configMap: + name: rate-limit-nginx-config + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegraf-conf + configMap: + name: atlas-telegrafd + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/frontend-service.yaml b/helm/atlas/templates/frontend-service.yaml new file mode 100644 index 00000000000..afda5b1af74 --- /dev/null +++ b/helm/atlas/templates/frontend-service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: atlas-ui-service + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: {{ .Values.atlas.service.type }} + ports: + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} \ No newline at end of file diff --git a/helm/atlas/templates/healthcheck-ingress.yaml b/helm/atlas/templates/healthcheck-ingress.yaml new file mode 100644 index 00000000000..b9483b10a39 --- /dev/null +++ b/helm/atlas/templates/healthcheck-ingress.yaml @@ -0,0 +1,77 @@ +{{- if .Values.atlas.healthcheckIngress.enabled -}} +{{- $ingressApiIsStable := eq (include "atlas.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "atlas.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "atlas.ingress.supportsPathType" .) "true" -}} +{{- $servicePort := .Values.atlas.service.port -}} +{{- $ingressPath := .Values.atlas.healthcheckIngress.path -}} +{{- $ingressPathType := .Values.atlas.healthcheckIngress.pathType -}} +{{- $extraPaths := .Values.atlas.healthcheckIngress.extraPaths -}} +apiVersion: {{ include "atlas.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: atlas-healthcheck-atlas + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- if .Values.atlas.healthcheckIngress.annotations }} + annotations: + {{- range $key, $value := .Values.atlas.healthcheckIngress.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.atlas.secondaryIngress.ingressClassName }} + ingressClassName: {{ .Values.atlas.secondaryIngress.ingressClassName }} + {{- end -}} +{{- if .Values.atlas.secondaryIngress.tls }} + tls: +{{ tpl (toYaml .Values.atlas.secondaryIngress.tls) $ | indent 4 }} +{{- end }} + rules: + {{- if .Values.atlas.secondaryIngress.hosts }} + {{- range .Values.atlas.secondaryIngress.hosts }} + - host: {{ tpl . $}} + http: + paths: +{{- if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- if $ingressPath }} + path: {{ $ingressPath }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/hpa.yaml b/helm/atlas/templates/hpa.yaml new file mode 100644 index 00000000000..e3a420fe2d0 --- /dev/null +++ b/helm/atlas/templates/hpa.yaml @@ -0,0 +1,33 @@ +{{- if and (.Values.deploy) (.Values.deploy.enabled) (.Values.hpa) (.Values.hpa.enabled) -}} +{{- if and (.Capabilities.APIVersions.Has "autoscaling/v2") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: autoscaling/v2 +{{- else -}} +apiVersion: autoscaling/v2beta2 +{{- end }} +kind: HorizontalPodAutoscaler +metadata: + name: {{ .Values.hpa.name }} + namespace: {{ .Values.Namespace | default "default" }} + labels: +{{ toYaml .Values.hpa.labels | indent 8 }} +spec: + minReplicas: {{ .Values.atlas.replicaCount }} + maxReplicas: {{ add (int .Values.atlas.replicaCount) 2 }} + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ .Values.hpa.name }} + metrics: + - type: Resource + resource: + name: memory + target: + averageUtilization: {{ .Values.hpa.memory.averageUtilization }} + type: Utilization + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.hpa.cpu.averageUtilization }} +{{- end }} diff --git a/helm/atlas/templates/keycloak-config-secret.yaml b/helm/atlas/templates/keycloak-config-secret.yaml new file mode 100644 index 00000000000..e98532d4bd4 --- /dev/null +++ b/helm/atlas/templates/keycloak-config-secret.yaml @@ -0,0 +1,16 @@ +{{- if .Values.atlas.secrets }} +apiVersion: v1 +kind: Secret +metadata: + name: atlas-keycloak-config + namespace: {{ .Values.Namespace | default "default" }} +type: Opaque +stringData: + KEYCLOAK_REALM: {{ .Values.atlas.secrets.KEYCLOAK_REALM }} + AUTH_SERVER_URL: {{ .Values.atlas.secrets.AUTH_SERVER_URL }} + KEYCLOAK_CLIENT_ID: {{ .Values.atlas.secrets.KEYCLOAK_CLIENT_ID }} + {{- if .Values.multitenant }} + {{ else }} + KEYCLOAK_CLIENT_SECRET: {{ .Values.atlas.secrets.KEYCLOAK_CLIENT_SECRET }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/nginx-cm.yaml b/helm/atlas/templates/nginx-cm.yaml new file mode 100644 index 00000000000..7573cf8f5a4 --- /dev/null +++ b/helm/atlas/templates/nginx-cm.yaml @@ -0,0 +1,324 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: rate-limit-nginx-config + namespace: {{ .Values.Namespace }} +data: + nginx.conf: |- + # Load the dynamic VTS module (must be at the top level) + load_module /etc/nginx/modules/ngx_http_vhost_traffic_status_module.so; + + user nginx; + worker_processes auto; + error_log /dev/stderr warn; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Set a higher limit for the client request body size to prevent 413 errors + client_max_body_size {{ .Values.nginx.clientMaxBodySize | default "512m" }}; + client_body_buffer_size {{ .Values.nginx.clientBodyBufferSize | default "512k" }}; + + # Increased timeout for client sending request body to 10 minutes + client_body_timeout {{ .Values.nginx.clientBodyTimeout | default "600s" }}; + + # Effective Atlan request id: use incoming header if present, else nginx's $request_id + map $http_x_atlan_request_id $atlan_request_id { + default $http_x_atlan_request_id; + "" $request_id; + } + + # Map for the general API rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $effective_agent_id_key { + default {{ .Values.nginx.default.header }}; # Default to rate limiting by agent_id + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit) + } + + # Map for the /api/meta/entity/bulk path-based rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $bulk_path_key { + default {{ .Values.nginx.bulk.header }}; # Default to rate limiting by agent_id + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit) + } + + # Map for the /api/meta/search/indexsearch path-based rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $indexsearch_path_key { + default {{ .Values.nginx.indexsearch.header }}; # Default to rate limiting by agent_id for these paths + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit for this path) + } + + log_format ratelimit_json escape=json '{' + '"time_local": "$time_local", ' + '"remote_addr": "$remote_addr", ' + '"remote_user": "$remote_user", ' + '"request_method": "$request_method", ' + '"request_uri": "$request_uri", ' + '"request_protocol": "$server_protocol", ' + '"request_length": "$request_length", ' + '"status": "$status", ' + '"body_bytes_sent": "$body_bytes_sent", ' + '"bytes_sent": "$bytes_sent", ' + '"http_referer": "$http_referer", ' + '"http_user_agent": "$http_user_agent", ' + '"http_x_forwarded_for": "$http_x_forwarded_for", ' + '"http_x_atlan_client_origin": "$http_x_atlan_client_origin", ' + '"http_x_atlan_agent": "$http_x_atlan_agent", ' + '"http_x_atlan_agent_id": "$http_x_atlan_agent_id", ' + '"http_x_atlan_package_name": "$http_x_atlan_package_name", ' + '"http_x_atlan_agent_workflow_id": "$http_x_atlan_agent_workflow_id", ' + '"http_x_atlan_via_ui": "$http_x_atlan_via_ui", ' + '"http_x_atlan_request_id": "$atlan_request_id", ' + '"http_x_atlan_google_sheets_id": "$http_x_atlan_google_sheets_id", ' + '"http_x_atlan_microsoft_excel_id": "$http_x_atlan_microsoft_excel_id", ' + '"http_x_atlan_task_guid": "$http_x_atlan_task_guid", ' + '"http_x_atlan_route": "$http_x_atlan_route", ' + '"effective_agent_id_key_used": "$effective_agent_id_key", ' + '"bulk_path_key_used": "$bulk_path_key", ' + '"indexsearch_path_key_used": "$indexsearch_path_key", ' + '"limit_req_status": "$limit_req_status", ' + '"request_time": "$request_time", ' + '"upstream_addr": "$upstream_addr", ' + '"upstream_status": "$upstream_status", ' + '"upstream_response_time": "$upstream_response_time", ' + '"upstream_connect_time": "$upstream_connect_time", ' + '"upstream_header_time": "$upstream_header_time"' + '}'; + + access_log /dev/stdout ratelimit_json; # Logging to container standard output + + # Define rate limit zones + limit_req_zone $effective_agent_id_key zone=agent_id_zone:{{ .Values.nginx.default.zoneMemory | default "10m" }} rate={{ .Values.nginx.default.rate | default "500r" }}/{{ .Values.nginx.default.rateUnit | default "m" }}; + limit_req_zone $indexsearch_path_key zone=indexsearch_limit_zone:{{ .Values.nginx.indexsearch.zoneMemory | default "10m" }} rate={{ .Values.nginx.indexsearch.rate | default "500r" }}/{{ .Values.nginx.indexsearch.rateUnit | default "m" }}; + limit_req_zone $bulk_path_key zone=entity_bulk_limit_zone:{{ .Values.nginx.bulk.zoneMemory | default "10m" }} rate={{ .Values.nginx.bulk.rate | default "200r" }}/{{ .Values.nginx.bulk.rateUnit | default "m" }}; + + # Map for VTS filtering by URL group (path-based categorization) + map $request_uri $url_group { + "~*^/api/atlas/v2/search/indexsearch" "api_atlas_v2_indexsearch"; + "~*^/api/meta/search/indexsearch" "api_meta_indexsearch"; + "~*^/api/atlas/v2/entity/bulk" "api_atlas_v2_entitybulk"; + "~*^/api/meta/entity/bulk" "api_meta_entitybulk"; + "~*^/api/meta/entity/auditSearch" "api_meta_entity_auditsearch"; + "~*^/api/atlas/v2/entity/auditSearch" "api_atlas_v2_entity_auditsearch"; + "~*^/api/meta/types/typedefs" "api_meta_types_typedefs"; + "~*^/api/atlas/v2/types/typedefs" "api_atlas_v2_types_typedefs"; + "~*^/api/meta/lineage/list" "api_meta_lineage_list"; + "~*^/api/atlas/v2/lineage/list" "api_atlas_v2_lineage_list"; + "~*^/api/atlas" "api_atlas_other"; + "~*^/api/meta" "api_meta_other"; + default "other_paths"; + } + + # VTS Module Configuration + vhost_traffic_status_zone shared:vhost_traffic_status:32m; + # Filter by exact status code and then by URL group + vhost_traffic_status_filter_by_set_key $status $url_group; + + # Server for Nginx stub_status (internal) + server { + listen 127.0.0.1:8081; + server_name localhost; + location /nginx_status { + stub_status; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + } + + # Server for Nginx VTS (internal) + server { + listen 127.0.0.1:8082; + server_name localhost; + location /vts_status { + vhost_traffic_status_display; + vhost_traffic_status_display_format prometheus; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + location /vts_status_html { + vhost_traffic_status_display; + vhost_traffic_status_display_format html; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + } + + # Main application server + server { + listen 8080; + set $atlas_upstream http://127.0.0.1:21000; + + # Common proxy headers to be included in relevant locations + # Standard headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Custom Atlan headers + proxy_set_header x-atlan-agent $http_x_atlan_agent; + proxy_set_header x-atlan-agent-id $http_x_atlan_agent_id; + proxy_set_header x-atlan-package-name $http_x_atlan_package_name; + proxy_set_header x-atlan-agent-workflow-id $http_x_atlan_agent_workflow_id; + proxy_set_header x-atlan-via-ui $http_x_atlan_via_ui; + proxy_set_header x-atlan-request-id $atlan_request_id; + proxy_set_header x-atlan-google-sheets-id $http_x_atlan_google_sheets_id; + proxy_set_header x-atlan-microsoft-excel-id $http_x_atlan_microsoft_excel_id; + proxy_set_header x-atlan-task-guid $http_x_atlan_task_guid; + proxy_set_header x-atlan-client-origin $http_x_atlan_client_origin; + proxy_set_header x-atlan-route $http_x_atlan_route; + + # Most specific locations first + location = /api/meta/search/indexsearch { + limit_req zone=indexsearch_limit_zone burst={{ .Values.nginx.indexsearch.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; # This is an outgoing header to client + } + + location = /api/atlas/v2/search/indexsearch { # Exact match for this specific path + limit_req zone=indexsearch_limit_zone burst={{ .Values.nginx.indexsearch.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; # If needed + } + + location = /api/meta/entity/bulk { # Exact match for this specific path + limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.bulk.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/entity/bulk { + limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # New locations for additional metrics - applying general API rate limit + location = /api/meta/entity/auditSearch { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/entity/auditSearch { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/meta/types/typedefs { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/types/typedefs { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/meta/lineage/list { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/lineage/list { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # General API prefixes (will catch remaining /api/meta/* and /api/atlas/*) + location /api/meta/ { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; # General API limit + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location /api/atlas/ { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; # General API limit + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # Default location for all other requests (UI, etc.) + location / { + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } + } + } diff --git a/helm/atlas/templates/pdb.yaml b/helm/atlas/templates/pdb.yaml new file mode 100644 index 00000000000..81248258ecd --- /dev/null +++ b/helm/atlas/templates/pdb.yaml @@ -0,0 +1,23 @@ +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: atlas + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} +{{- end }} diff --git a/helm/atlas/templates/podmonitor.yaml b/helm/atlas/templates/podmonitor.yaml new file mode 100644 index 00000000000..95f980f8d49 --- /dev/null +++ b/helm/atlas/templates/podmonitor.yaml @@ -0,0 +1,37 @@ +{{- if .Values.atlas.podMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: atlas-podmonitor + {{- if .Values.atlas.podMonitor.namespace }} + namespace: {{ .Values.atlas.podMonitor.namespace }} + {{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- toYaml .Values.atlas.podMonitor.labels | nindent 4 }} +spec: + podMetricsEndpoints: + - interval: {{ .Values.atlas.podMonitor.interval }} + {{- if .Values.atlas.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.atlas.podMonitor.scrapeTimeout }} + {{- end }} + honorLabels: true + port: telegrafd + path: /metrics + scheme: {{ .Values.atlas.podMonitor.scheme }} + {{- if .Values.atlas.podMonitor.relabelings }} + relabelings: + {{- toYaml .Values.atlas.podMonitor.relabelings | nindent 4 }} + {{- end }} + jobLabel: "atlas-podMonitor-job" + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + namespaceSelector: + matchNames: + - atlas +{{- end }} diff --git a/helm/atlas/templates/primary-ingress.yaml b/helm/atlas/templates/primary-ingress.yaml new file mode 100644 index 00000000000..dc7f21280df --- /dev/null +++ b/helm/atlas/templates/primary-ingress.yaml @@ -0,0 +1,81 @@ +{{- if .Values.atlas.ingress.enabled -}} +{{- if eq .Values.atlas.Deployment_Type "Development" -}} +{{- $ingressApiIsStable := eq (include "atlas.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "atlas.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "atlas.ingress.supportsPathType" .) "true" -}} +{{- $servicePort := .Values.atlas.service.port -}} +{{- $ingressPath := .Values.atlas.ingress.path -}} +{{- $ingressPathType := .Values.atlas.ingress.pathType -}} +{{- $extraPaths := .Values.atlas.ingress.extraPaths -}} +apiVersion: {{ include "atlas.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: atlas-atlas + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- if .Values.atlas.ingress.annotations }} + annotations: + {{- range $key, $value := .Values.atlas.ingress.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.atlas.ingress.ingressClassName }} + ingressClassName: {{ .Values.atlas.ingress.ingressClassName }} + {{- end -}} +{{- if .Values.atlas.ingress.tls }} + tls: + - hosts: + - {{ .Values.atlas.ingress.tls.host }} + secretName: {{ .Values.atlas.ingress.tls.secretName }} +{{- end }} + rules: + {{- if .Values.atlas.ingress.hosts }} + {{- range .Values.atlas.ingress.hosts }} + - host: {{ tpl . $}} + http: + paths: +{{- if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-ui-service + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-ui-service + servicePort: {{ $servicePort }} + {{- end }} + {{- if $ingressPath }} + path: {{ $ingressPath }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/ratelimited-service.yaml b/helm/atlas/templates/ratelimited-service.yaml new file mode 100644 index 00000000000..5c6e3cd290d --- /dev/null +++ b/helm/atlas/templates/ratelimited-service.yaml @@ -0,0 +1,32 @@ +{{- if not .Values.global.atlasNginx.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: atlas-ratelimited + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: ClusterIP + ports: + {{- if and .Values.nginx.enabled .Values.nginx.ratelimit.enabled }} + - port: 80 + targetPort: 8080 + protocol: TCP + name: {{ .Values.atlas.service.portName }} + {{- else }} + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + {{- end }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} + internalTrafficPolicy: Cluster + ipFamilyPolicy: SingleStack + sessionAffinity: None +{{- end }} diff --git a/helm/atlas/templates/secondary-ingress.yaml b/helm/atlas/templates/secondary-ingress.yaml new file mode 100644 index 00000000000..2989681601e --- /dev/null +++ b/helm/atlas/templates/secondary-ingress.yaml @@ -0,0 +1,172 @@ +{{- if .Values.atlas.secondaryIngress.enabled -}} +{{- if eq .Values.albTenant true }} +{{- $ingressApiIsStable := eq (include "atlas.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "atlas.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "atlas.ingress.supportsPathType" .) "true" -}} +{{- $servicePort := .Values.atlas.service.port -}} +{{- $ingressPath := .Values.atlas.secondaryIngress.path -}} +{{- $ingressPathType := .Values.atlas.secondaryIngress.pathType -}} +{{- $extraPaths := .Values.atlas.secondaryIngress.extraPaths -}} +apiVersion: {{ include "atlas.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: atlas-atlas-secondary + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- if .Values.atlas.secondaryIngress.annotations }} + annotations: + {{- range $key, $value := .Values.atlas.secondaryIngress.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.atlas.secondaryIngress.ingressClassName }} + ingressClassName: {{ .Values.atlas.secondaryIngress.ingressClassName }} + {{- end -}} +{{- if .Values.atlas.secondaryIngress.tls }} + tls: +{{ tpl (toYaml .Values.atlas.secondaryIngress.tls) $ | indent 4 }} +{{- end }} + rules: + {{- if .Values.atlas.secondaryIngress.hosts }} + {{- range .Values.atlas.secondaryIngress.hosts }} + - host: {{ tpl . $}} + http: + paths: +{{- if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + {{- if eq $.Values.global.atlasNginx.enabled true }} + name: atlas-ratelimited + {{- else }} + name: atlas-service-atlas + {{- end }} + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-service-atlas + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + service: + name: redirect-to-home + port: + name: use-annotation + path: /api/meta/admin/ + pathType: Prefix + - backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-service-atlas + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-service-atlas + servicePort: {{ $servicePort }} + {{- end }} + {{- if $ingressPath }} + path: {{ $ingressPath }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- else }} +{{- $ingressApiIsStable := eq (include "atlas.ingress.isStable" .) "true" -}} +{{- $ingressSupportsIngressClassName := eq (include "atlas.ingress.supportsIngressClassName" .) "true" -}} +{{- $ingressSupportsPathType := eq (include "atlas.ingress.supportsPathType" .) "true" -}} +{{- $servicePort := .Values.atlas.service.port -}} +{{- $ingressPath := .Values.atlas.secondaryIngress.path -}} +{{- $ingressPathType := .Values.atlas.secondaryIngress.pathType -}} +{{- $extraPaths := .Values.atlas.secondaryIngress.extraPaths -}} +apiVersion: {{ include "atlas.ingress.apiVersion" . }} +kind: Ingress +metadata: + name: atlas-atlas-secondary + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +{{- if eq .Values.global.cloud "azure" }} + annotations: + {{- range $key, $value := .Values.atlas.secondaryIngress.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} +{{- else }} + annotations: + kubernetes.io/ingress.class: "kong" + konghq.com/strip-path: "true" + konghq.com/preserve-host: "true" + konghq.com/plugins: keycloak-jwt, xss +{{- end }} +spec: + {{- if and $ingressSupportsIngressClassName .Values.atlas.secondaryIngress.ingressClassName }} + ingressClassName: {{ .Values.atlas.secondaryIngress.ingressClassName }} + {{- end -}} +{{- if .Values.atlas.secondaryIngress.tls }} + tls: +{{ tpl (toYaml .Values.atlas.secondaryIngress.tls) $ | indent 4 }} +{{- end }} + rules: + {{- if .Values.atlas.secondaryIngress.hosts }} + {{- range .Values.atlas.secondaryIngress.hosts }} + - host: {{ tpl . $}} + http: + paths: +{{- if $extraPaths }} +{{ toYaml $extraPaths | indent 10 }} +{{- end }} + - path: {{ $ingressPath }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-service-atlas + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-service-atlas + servicePort: {{ $servicePort }} + {{- end }} + {{- end }} + {{- else }} + - http: + paths: + - backend: + {{- if $ingressApiIsStable }} + service: + name: atlas-service-atlas + port: + number: {{ $servicePort }} + {{- else }} + serviceName: atlas-service-atlas + servicePort: {{ $servicePort }} + {{- end }} + {{- if $ingressPath }} + path: {{ $ingressPath }} + {{- end }} + {{- if $ingressSupportsPathType }} + pathType: {{ $ingressPathType }} + {{- end }} + {{- end -}} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/service.yaml b/helm/atlas/templates/service.yaml new file mode 100644 index 00000000000..3ace207d0da --- /dev/null +++ b/helm/atlas/templates/service.yaml @@ -0,0 +1,47 @@ +apiVersion: v1 +kind: Service +metadata: + name: atlas-service-atlas + namespace: {{ .Values.Namespace }} + annotations: + {{- if eq .Values.global.cloud "gcp" }} + cloud.google.com/neg: '{"exposed_ports": {"80":{"name": "{{ .Values.global.tenantName }}-atlas-80-neg-http"}}}' + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- else if eq .Values.global.cloud "azure" }} + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- else }} + konghq.com/path: /api/atlas/v2/ + konghq.com/plugins: svc-rate-limit + {{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + {{- if eq .Values.albTenant true }} + type: {{ .Values.atlas.service.type }} + {{- else if eq .Values.global.cloud "gcp" }} + type: ClusterIP + {{- else if eq .Values.global.cloud "azure" }} + type: ClusterIP + {{- else }} + type: ClusterIP + {{- end }} + ports: + {{- if and .Values.nginx.enabled .Values.nginx.ratelimit.default_atlas_service }} + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + {{- else }} + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + {{- end }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} diff --git a/helm/atlas/templates/statefulset.yaml b/helm/atlas/templates/statefulset.yaml new file mode 100644 index 00000000000..28e2d9fb439 --- /dev/null +++ b/helm/atlas/templates/statefulset.yaml @@ -0,0 +1,253 @@ +{{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: atlas + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + annotations: + configmap.reloader.stakater.com/reload: "atlas-config,atlas-logback-config,atlas-audit-index,atlas-keycloak-config,atlas-init-script,atlas-init-container-script,rate-limit-nginx-config" + secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-keycloak-config" +spec: + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + replicas: {{ .Values.atlas.replicaCount }} + serviceName: "atlas-service-atlas" + template: + metadata: + labels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + annotations: +{{ toYaml .Values.atlas.podAnnotations | indent 8 }} + spec: + {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} + affinity: + nodeAffinity: + {{- if eq .Values.atlas.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} + - matchExpressions: + - key: purpose + operator: In + values: + - search + - key: karpenter.sh/capacity-type + operator: In + values: + - on-demand + {{- else }} + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range .Values.atlas.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- end }} + {{- if eq .Values.atlas.podAntiAffinity true }} + podAntiAffinity: + {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} + {{- end }} + {{- end }} + {{- if .Values.priorityClassName }} + priorityClassName: {{ .Values.priorityClassName }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + initContainers: + {{- if .Values.atlas.initContainers }} + {{- toYaml .Values.atlas.initContainers | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }}-main + command: [ + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] + image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" + imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} + ports: + - containerPort: {{ .Values.atlas.service.targetPort }} + env: + - name: K8S_POD_IP + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_UID + valueFrom: + fieldRef: + fieldPath: metadata.uid + - name: K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: Namespace + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: HOST_IP + valueFrom: + fieldRef: + fieldPath: status.hostIP + - name: OTEL_SERVICE_NAME + value: atlas + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://$(HOST_IP):4317 + - name: OTEL_RESOURCE_ATTRIBUTES + value: >- + k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs + {{- toYaml .Values.atlas.env | nindent 12 }} + {{- if eq .Values.albTenant true }} + - name: ALB_ENABLED + value: "true" + {{- end }} + envFrom: + - secretRef: + name: atlas-keycloak-config + {{- if .Values.multitenant }} + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name + {{- end }} + resources: + {{- $tierType := .Values.global.Tier_Type | default "" }} + {{- if eq $tierType "Enterprise" }} + {{ toYaml .Values.atlas.resources | nindent 12 }} + {{- else if eq $tierType "Basic" }} + {{ toYaml .Values.atlas.resources_basic | nindent 12 }} + {{- else if eq $tierType "Standard" }} + {{ toYaml .Values.atlas.resources_standard | nindent 12 }} + {{- else }} + {{- toYaml .Values.atlas.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: atlas-config + mountPath: /opt/apache-atlas/conf/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-logback-config + mountPath: /opt/apache-atlas/conf/atlas-logback.xml + subPath: atlas-logback.xml + - name: create-atlas-keycloak-config + mountPath: /create-atlas-keycloak-config.sh + subPath: create-atlas-keycloak-config.sh + - name: atlas-logs + mountPath: /opt/apache-atlas/logs + {{- if .Values.atlas.lifecycle }} + lifecycle: + {{- toYaml .Values.atlas.lifecycle | nindent 12 }} + {{- end }} + {{- if .Values.atlas.livenessProbe }} + livenessProbe: + {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.readinessProbe }} + readinessProbe: + {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegrafd + imagePullPolicy: IfNotPresent + {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} + image: {{ .Values.multiarch.image.telegrafd }} + {{- else }} + image: ghcr.io/atlanhq/telegraf:1.24.3 + {{- end }} + volumeMounts: + - name: telegraf-conf + mountPath: /etc/telegraf/ + - name: nginx-log-volume + mountPath: /var/log/nginx + readOnly: true + ports: + - name: telegrafd + containerPort: 9273 + {{- end }} + {{- if .Values.nginx.enabled }} + - name: nginx-ratelimit + image: ghcr.io/atlanhq/nginx-vts-atlan-v2:1.27.5.1-multiarch + ports: + - containerPort: 8080 + protocol: TCP + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-log-volume + mountPath: /var/log/nginx + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} + {{- end }} + volumes: + - name: atlas-logs + emptyDir: {} + - name: atlas-config + configMap: + name: atlas-config + - name: atlas-logback-config + configMap: + name: atlas-logback-config + - name: create-atlas-keycloak-config + configMap: + name: create-atlas-keycloak-config-cm + defaultMode: 0755 + - name: atlas-init-script + configMap: + name: atlas-init-script + defaultMode: 0755 + - name: atlas-init-container-script + configMap: + name: atlas-init-container-script + defaultMode: 0755 + - name: atlas-audit-index + configMap: + name: atlas-audit-index + defaultMode: 0755 + - name: atlas-config-map-rw-vol + emptyDir: {} + {{- if .Values.nginx.enabled }} + - name: nginx-log-volume + emptyDir: {} + - name: nginx-config + configMap: + name: rate-limit-nginx-config + {{- end }} + {{- if .Values.atlas.telegraf.enabled }} + - name: telegraf-conf + configMap: + name: atlas-telegrafd + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/templates/statsd-cronjob.yaml b/helm/atlas/templates/statsd-cronjob.yaml new file mode 100644 index 00000000000..52763ea386e --- /dev/null +++ b/helm/atlas/templates/statsd-cronjob.yaml @@ -0,0 +1,63 @@ +{{- if .Values.atlas.statsdJob.enabled }} +{{- if and (.Capabilities.APIVersions.Has "batch/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +apiVersion: batch/v1 +{{- else -}} +apiVersion: batch/v1beta1 +{{- end }} +kind: CronJob +metadata: + name: atlas-statsd-job + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + failedJobsHistoryLimit: 1 + successfulJobsHistoryLimit: 1 + concurrencyPolicy: Replace + schedule: "{{ .Values.atlas.statsdJob.schedule }}" + jobTemplate: + spec: + template: + spec: + {{- if or (and .Values.multiarch (eq .Values.multiarch.enabled true)) .Values.atlas.tolerations }} + tolerations: + {{- if and .Values.multiarch (eq .Values.multiarch.enabled true) }} + - key: "archtype" + operator: "Equal" + value: "arm64" + effect: "NoSchedule" + {{- end }} + {{- if .Values.atlas.tolerations }} + {{ toYaml .Values.atlas.tolerations | nindent 10 }} + {{- end }} + {{- end }} + {{- with .Values.atlas.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- if .Values.atlas.imagePullSecrets }} + imagePullSecrets: + {{- toYaml .Values.atlas.imagePullSecrets | nindent 12 }} + {{- end }} + containers: + - name: metrics-job + {{- if and .Values.multiarch.enabled .Values.multiarch.image.alpine_curl }} + image: {{ .Values.multiarch.image.alpine_curl }} + {{- else }} + image: ghcr.io/atlanhq/alpine-curl:3.14 + {{- end }} + command: + - sh + - -c + - | + echo "[+] Checking for Atlas" + until printf "." && curl -X GET "http://atlas-service-atlas.atlas.svc.cluster.local/api/atlas/admin/health"; do + sleep 2; + done; + echo 'Atlas OK ✓' + curl -X GET "http://atlas-service-atlas.atlas.svc.cluster.local/api/atlas/admin/pushMetricsToStatsd" + restartPolicy: Never +{{- end -}} diff --git a/helm/atlas/templates/telegraf-config.yaml b/helm/atlas/templates/telegraf-config.yaml new file mode 100644 index 00000000000..2be718e12d1 --- /dev/null +++ b/helm/atlas/templates/telegraf-config.yaml @@ -0,0 +1,150 @@ +{{- if .Values.atlas.telegraf.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: atlas-telegrafd + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: + telegraf.conf: |- + # Telegraf Configuration + # Global Agent Configuration + [agent] + interval = "10s" # Default data collection interval + round_interval = true + metric_batch_size = 1000 + metric_buffer_limit = 10000 + collection_jitter = "0s" + flush_interval = "10s" + flush_jitter = "0s" + precision = "" + # debug = false + # quiet = false + hostname = "" # Will be set by Telegraf automatically + omit_hostname = false + {{- if .Values.nginx.ratelimit.enabled }} + # Input plugin for basic Nginx stub_status metrics + [[inputs.nginx]] + ## An array of Nginx stub_status URI to gather stats from. + urls = ["http://127.0.0.1:8081/nginx_status"] + ## Response timeout. + # response_timeout = "5s" + # Add new input plugin for Prometheus format from Nginx VTS + [[inputs.prometheus]] + ## An array of URLs to scrape Prometheus metrics from. + urls = ["http://127.0.0.1:8082/vts_status"] # URL of the VTS status endpoint + ## Metric version for parsing. Default is 0 (auto), try 2 if issues. + # metric_version = 0 # Or 2 for OpenMetrics if VTS outputs that + ## Timeout for scraping. + # response_timeout = "5s" + ## Optional: Add tags to metrics from this input + # [inputs.prometheus.tags] + # source = "nginx_vts" + {{- end }} + [global_tags] + deployment="atlas" + [[inputs.http]] + urls = ["http://127.0.0.1:21000/api/atlas/admin/metrics/prometheus"] + data_format = "prometheus" + name_override = "atlas-metastore" + [[inputs.statsd]] + service_address = ":8125" + [[inputs.jolokia2_agent]] + name_override = "atlas" + urls = ["http://127.0.0.1:7777/jolokia"] + [[inputs.jolokia2_agent.metric]] + name = "heap_memory_usage" + mbean = "java.lang:type=Memory" + paths = ["HeapMemoryUsage"] + field_prefix = "memory_" + [[inputs.jolokia2_agent.metric]] + name = "non_heap_memory_usage" + mbean = "java.lang:type=Memory" + paths = ["NonHeapMemoryUsage"] + field_prefix = "memory_" + [[inputs.jolokia2_agent.metric]] + name = "thread_count" + mbean = "java.lang:type=Threading" + paths = ["TotalStartedThreadCount,ThreadCount,DaemonThreadCount,PeakThreadCount"] + field_prefix = "thread_" + [[inputs.jolokia2_agent.metric]] + name = "class_count" + mbean = "java.lang:type=ClassLoading" + paths = ["LoadedClassCount,UnloadedClassCount,TotalLoadedClassCount"] + field_prefix = "class_" + [[inputs.jolokia2_agent.metric]] + name = "os" + mbean = "java.lang:type=OperatingSystem" + paths = ["MaxFileDescriptorCount,ProcessCpuTime,AvailableProcessors,SystemCpuLoad,TotalSwapSpaceSize,OpenFileDescriptorCount,FreePhysicalMemorySize,CommittedVirtualMemorySize,ProcessCpuLoad,FreeSwapSpaceSize,TotalPhysicalMemorySize"] + field_prefix = "os_" + [[inputs.jolokia2_agent.metric]] + name = "tenured_gen_memorypool" + mbean = "java.lang:name=Tenured Gen,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,UsageThresholdCount,UsageThreshold,PeakUsage"] + field_prefix = "memory_tenured_gen_" + [[inputs.jolokia2_agent.metric]] + name = "par_eden_space_gen_memorypool" + mbean = "java.lang:name=Par Eden Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_par_eden_" + [[inputs.jolokia2_agent.metric]] + name = "par_survivor_memorypool" + mbean = "java.lang:name=Par Survivor Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_par_survivor_" + [[inputs.jolokia2_agent.metric]] + name = "g1_survivor_memorypool" + mbean = "java.lang:name=G1 Survivor Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_g1_survivor_" + [[inputs.jolokia2_agent.metric]] + name = "g1_eden_memorypool" + mbean = "java.lang:name=G1 Eden Space,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,PeakUsage"] + field_prefix = "memory_g1_eden_" + [[inputs.jolokia2_agent.metric]] + name = "g1_oldgen_memorypool" + mbean = "java.lang:name=G1 Old Gen,type=MemoryPool" + paths = ["CollectionUsageThresholdExceeded,CollectionUsage,CollectionUsageThresholdSupported,CollectionUsageThreshold,Usage,UsageThresholdCount,UsageThreshold,PeakUsage"] + field_prefix = "memory_g1_oldgen_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_parnew" + mbean = "java.lang:name=ParNew,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_parnew_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_marksweep" + mbean = "java.lang:name=MarkSweepCompact,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_marksweep_" + [[inputs.jolokia2_agent.metric]] + name = "jvm_runtime" + mbean = "java.lang:type=Runtime" + paths = ["Uptime"] + field_prefix = "uptime_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_g1_young" + mbean = "java.lang:name=G1 Young Generation,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_g1_young_" + [[inputs.jolokia2_agent.metric]] + name = "garbage_collection_g1_old" + mbean = "java.lang:name=G1 Old Generation,type=GarbageCollector" + paths = ["ObjectName,CollectionCount,Valid,CollectionTime,LastGcInfo,MemoryPoolNames"] + field_prefix = "gc_g1_old_" + [[inputs.jolokia2_agent.metric]] + name = "janusgraph_metrics" + mbean = "metrics:name=org.janusgraph.*,*" + field_prefix = "$1" + paths = ["Max", "Min", "Mean", "StdDev", "50thPercentile", + "75thPercentile", "95thPercentile", "98thPercentile", + "99thPercentile", "999thPercentile", "Count", "FifteenMinuteRate", + "FiveMinuteRate", "MeanRate", "OneMinuteRate", "SnapshotSize"] + [[outputs.prometheus_client]] + ## Address to listen on. + listen = ":9273" +{{- end }} diff --git a/helm/atlas/templates/tls_secrets.yaml b/helm/atlas/templates/tls_secrets.yaml new file mode 100644 index 00000000000..b8fa240d544 --- /dev/null +++ b/helm/atlas/templates/tls_secrets.yaml @@ -0,0 +1,10 @@ +{{- if .Values.atlas.ingress.tlsSecrets }} +apiVersion: v1 +kind: Secret +metadata: + name: tls-wildcard-atlas + namespace: {{ .Values.Namespace | default "default" }} +type: Opaque +data: +{{- toYaml .Values.atlas.ingress.tlsSecrets | nindent 8 }} +{{- end }} diff --git a/helm/atlas/values.yaml b/helm/atlas/values.yaml new file mode 100644 index 00000000000..b6638c6d84a --- /dev/null +++ b/helm/atlas/values.yaml @@ -0,0 +1,502 @@ + +multiarch: + enabled: false + image: {} + +# Default values for atlas. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +global: + Tier_Type: "" + cloud: "" + tenantName: "" + svcIsolation: + enabled: false + atlasNginx: + enabled: false + +Namespace: atlas +sentry_flag: disable +albTenant: false +podDisruptionBudget: + enabled: true + minAvailable: "1" + +hpa: + name: atlas + labels: + app: atlas + cpu: + averageUtilization: 85 + memory: + averageUtilization: 85 + +atlas: + cache: + enabled: false + podAntiAffinity: true + custom_deployment: + enabled: false + instance_type: + - m6a.2xlarge + sentry: + sampleRate: 0.5 + ranger: + RANGER_PASSWORD: '{{repl ConfigOption "RangerPassword"}}' + RANGER_SERVICE_URL: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + multitenant: '' + Deployment_Type: '' + replicaCount: 2 + config: + entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" + janusgraph: + atomic_mutation: true + janusgraph_tx_buffer_size: 8192 + keycloak: + token_introspection: true + introspection_cache: false + indexsearch: + enable_api_limit: false + query_size_max_limit: 100000 + enable_async: true + request_timeout_in_secs: 60 + enable_janus_optimization: true + enable_janus_optimization_for_relationship: true + enable_janus_optimization_for_classifications: false + enable_janus_optimization_extended: true + enable_janus_optimization_for_lineage: false + jg: + super_vertex_edge_count: 100000 + super_vertex_edge_timeout: 30 + bulk: + max_entities_allowed: 10000 + enable_janus_optimization: true + lineage: + optimised_calculation: true + enable_connection_lineage: false + authorizer: + enable_delta_based_refresh: true + enable_abac: true + index: + audit_index_field_limit: 10000 + audit_index_refresh_interval: 1s + distributed_task: + enabled: false + cleanup_supported_asset_types: "Process,AirflowTask" + cleanup_supported_relationship_labels: "__Process.inputs,__Process.outputs,__AirflowTask.inputs,__AirflowTask.outputs" + types_update: + async_enable: true + thread_count: 5 + + podAnnotations: + backup.velero.io/backup-volumes-excludes: master + + image: + repository: ghcr.io/atlanhq/atlas-metastore-ATLAS_BRANCH_NAME + tag: ATLAS_LATEST_IMAGE_TAG + pullPolicy: IfNotPresent + imagePullSecrets: {} + tolerations: [] + + # Affinity rules for atlas + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - weight: 1 + preference: + matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + app: atlas + topologyKey: kubernetes.io/hostname + + # Kubernetes service for atlas + service: + portName: atlas + type: ClusterIP + path: /api/atlas/v2/ + port: 80 + targetPort: 21000 + + + # kubernetes lifecycle hooks + lifecycle: + preStop: + exec: + command: + - /bin/sh + - -c + - curl -X GET http://localhost:21000/api/atlas/admin/killtheleader + + # Kubernetes ingress for atlas + # Primary ingress. + ingress: + enabled: true + serviceName: atlas-ui-service + annotations: + kubernetes.io/ingress.class: "kong" + konghq.com/preserve-host: "true" + konghq.com/plugins: keycloak-jwt, xss + labels: {} + path: / + # pathType is only for k8s >= 1.1= + pathType: ImplementationSpecific + hosts: [] + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + tls: {} + # Secrets for SSl + tlsSecrets: + tls.key: '' + tls.crt: '' + + # Healthcheck ingress data. + healthcheckIngress: + enabled: true + annotations: + kubernetes.io/ingress.class: "kong" + ## Path for grafana ingress + path: /api/atlas/admin/status + # pathType is only for k8s > 1.19 + pathType: Prefix + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + + # Secondary ingress which can be used to provide access on /atlas path + secondaryIngress: + enabled: true + # Used to create an Ingress record. + hosts: [] + ## Path for grafana ingress + path: /api/meta/ + # pathType is only for k8s > 1.19 + pathType: Prefix + labels: {} + ## Extra paths to prepend to every host configuration. This is useful when working with annotation based services. + extraPaths: [] + annotations: + kubernetes.io/ingress.class: "kong" + konghq.com/strip-path: "true" + konghq.com/preserve-host: "true" + konghq.com/plugins: keycloak-jwt, xss + tls: [] + # Secrets for SSl + tlsSecrets: + tls.key: '' + tls.crt: '' + + # Node selector config for atlas statefulset + nodeSelector: {} + priorityClassName: "" + # Init container for atlas. Right now all checks are combined into one init container to reduce atlas start time. + initContainers: + - name: init-container-bundle + image: ghcr.io/atlanhq/alpine-python-atlan-v2:3.9.21 + imagePullPolicy: IfNotPresent + volumeMounts: + - name: atlas-init-script + mountPath: /tmp/atlas-init.sh + subPath: atlas-init.sh + - name: atlas-config + mountPath: /tmp/configfile/atlas-application.properties + subPath: atlas-application.properties + - name: atlas-config-map-rw-vol + mountPath: /tmp/newconfigfile + - name: atlas-audit-index + mountPath: /scripts/atlas-audit.sh + subPath: atlas-audit.sh + - name: atlas-init-container-script + mountPath: /scripts/atlas-init-container.sh + subPath: atlas-init-container.sh + env: + - name: ATLAS_SERVICE_NAME + value: 'atlas' + - name: RANGER_SERVICE_URL + value: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + - name: RANGER_USERNAME + value: '' + - name: RANGER_PASSWORD + value: '' + - name: KEYCLOAK_ADDRESS + value: 'http://keycloak-http.keycloak.svc.cluster.local/auth' + command: + - /scripts/atlas-init-container.sh + + + resources: + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + limits: + cpu: 3000m + memory: 8Gi + requests: + cpu: 3000m + memory: 8Gi + + resources_basic: + limits: + memory: 4Gi + requests: + memory: 20Mi + + resources_standard: + limits: + memory: 6Gi + requests: + memory: 20Mi + + # Liveness and readiness probes for atlas + livenessProbe: + failureThreshold: 3 + httpGet: + path: /api/atlas/admin/health + port: 21000 + scheme: HTTP + initialDelaySeconds: 720 + periodSeconds: 60 + successThreshold: 1 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /api/atlas/admin/health + port: 21000 + scheme: HTTP + failureThreshold: 3 + initialDelaySeconds: 30 + periodSeconds: 60 + successThreshold: 1 + timeoutSeconds: 5 + + env: + - name: ATLAS_SERVER_OPTS + value: '-XX:MaxRAMPercentage=80.0 -XX:InitialRAMPercentage=50.0' + - name: MAVEN_OPTS + value: '-Xmx4g -Xms4g' + - name: ATLAS_CLIENT_OPTS + value: '-Xmx1g -Xms1g' + - name: RANGER_SERVICE_URL + value: 'http://ranger-service.ranger.svc.cluster.local:80/api/policy' + - name: ATLAS_REPOSITORY_NAME + value: "atlas" + - name: ATLAS_USE_LEGACY_SEARCH + value: "false" + + + # We are using these in configmap for atlas-keycloak + secrets: + AUTH_SERVER_URL: '' + KEYCLOAK_REALM: '' + KEYCLOAK_CLIENT_ID: '' + KEYCLOAK_CLIENT_SECRET: '' + SENTRY_DSN_SECRET: '' + SENTRY_DSN_DEV: '' + SENTRY_DSN_PROD: '' + INSTANCE_NAME: '' + + # Redis config for atlas + # This is used in atlas configmap + redis: + enabled: true + host: ${USER_REDIS_HOST} + port: ${USER_REDIS_PORT} + sentinel_urls: ${USER_REDIS_SENTINEL_HOSTS} + master_name: ${USER_REDIS_MASTER_SET_NAME} + password: ${MASTER_PASSWORD} + username: ${USER_REDIS} + maxConnections: 100 + timeout: 100000 + + # Pod monitor to send metrics from telegraf to prometheus + podMonitor: + ## If true, a PodMonitor CRD is created for a prometheus operator + ## https://github.com/coreos/prometheus-operator + ## + enabled: true + namespace: monitoring + labels: + app: prometheus-operator + release: prometheus-operator + interval: 30s + scrapeTimeout: 10s + scheme: http + relabelings: [] + + # Flag to enable telegraf sidecar for metrics + telegraf: + enabled: true + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 300m + memory: 256Mi + + # Flag to enable statsD cronjob and schedule + statsdJob: + enabled: true + schedule: '*/10 * * * *' + + # Used in atlas configmaps + # can be used to setup slack notifications + notification: + slackWebhook: '' + +cassandra: + + updateStrategy: + type: RollingUpdate + + resources: + requests: + memory: 4Gi + #cpu: 1500m + limits: + memory: 5Gi + #cpu: 2000m + + # Config for cassandra + + max_heap_size: 2048M + heap_new_size: 512M + + config: + cluster_domain: cluster.local + cluster_name: cassandra + cluster_size: 3 + seed_size: 3 + start_rpc: true + ports: + cql: 9042 + + + + # Persistence changes for cassandra + persistence: + enabled: true + accessMode: ReadWriteOnce + size: 10Gi + + nodeSelector: {} + # nodegroup: atlan-atlas + + ## Affinity for pod assignment + ## Ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + affinity: + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchExpressions: + - key: "app" + operator: In + values: + - cassandra + topologyKey: "kubernetes.io/hostname" + # Cassandra exporter configuration + exporter: + enabled: true + serviceMonitor: + enabled: false + additionalLabels: + release: "prometheus-operator" + # prometheus: default + image: + repo: ghcr.io/atlanhq/cassandra_exporter + tag: 2.0.2 + jvmOpts: "" + resources: + limits: + #cpu: 200m + memory: 500Mi + requests: + #cpu: 100m + memory: 200Mi + podAnnotations: {} + + # Cassandra backup configuration + backup: + enabled: false + schedule: + - keyspace: atlas + cron: "0 3 * * *" + annotations: + iam.amazonaws.com/role: "" + image: + repository: ghcr.io/atlanhq/cain + tag: 0.6.0 + # Name of the secret containing the credentials of the service account used by GOOGLE_APPLICATION_CREDENTIALS, as a credentials.json file + extraArgs: + - -c + - atlas-cassandra + google: + serviceAccountSecret: + env: + - name: AWS_REGION + value: "" + resources: + requests: + memory: 1Gi + #cpu: 1 + limits: + memory: 1Gi + #cpu: 1 + destination: "" + +nginx: + enabled: true + clientMaxBodySize: "512m" # The maximum size of the request body. + clientBodyBufferSize: "512k" # The buffer size for reading the request body. In is nginx InMemory buffer size per request. Excessive request size will be written on disk defined by clientMaxBodySize. + clientBodyTimeout: "600s" # Allow clients up to 10 minutes to actively send their request body before Nginx times out the connection. + proxyReadTimeout: "10800s" # 3 hrs - Time taken to read a response from the atlas server. The workflow client will wait for 3 hrs for the response. + proxyConnectTimeout: "60s" # 1 min - Time taken to establish a connection to the atlas server. + ratelimit: + enabled: true + default_atlas_service: true + default: + zoneMemory: "20m" + rate: "500r" + rateUnit: "m" + burst: 20 + header: "$http_x_atlan_agent_id" + indexsearch: + zoneMemory: "20m" # Example: Zone memory, e.g., 10m, 20m + rate: "500r" # Example: Rate, e.g., 1000r (requests) + rateUnit: "m" # Example: Rate unit, e.g., m (minute), s (second), 1000r (requests per m minute) + burst: 100 # Example: Burst size + header: "$http_x_atlan_agent_id" # Example: Header name + bulk: + zoneMemory: "20m" + rate: "1000r" + rateUnit: "m" + burst: 100 + header: "$http_x_atlan_agent_id" + logging: + format: '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$http_x_atlan_client_origin" "$http_x_atlan_agent_id"' + vts: + zoneMemory: "32m" + +# Summary of Timeout Settings in Nginx + +# To Upstream (Atlas): +# proxy_connect_timeout: - Time taken to establish a connection to the atlas server. +# proxy_read_timeout: - Time taken to read a response from the atlas server. +# proxy_send_timeout: (defaults to proxy_read_timeout) - Time taken to send a request to the atlas server. +# From Client: +# client_body_timeout: - Time taken by the client to send the request body. +# client_header_timeout: - Time taken by the client to send the request header. +# keepalive_timeout: - This timeout applies to an idle client connection after Nginx has finished sending a response and is waiting for the next request on the same TCP connection. From c0ece658fd4b251709bbec79f2afa78bc6fb6297 Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 27 Oct 2025 18:47:37 +0530 Subject: [PATCH 2/7] sync with atlan preprod #1 --- .../cassandra/templates/backup/cronjob.yaml | 4 - .../cassandra/templates/reaper/cronjob.yaml | 4 - .../cassandra/templates/statefulset.yaml | 11 +- helm/atlas/charts/cassandra/values.yaml | 20 +- .../templates/es-service-isolated.yaml | 35 +++ .../templates/es-sts-isolated.yaml | 247 ++++++++++++++++++ .../elasticsearch/templates/statefulset.yaml | 7 +- helm/atlas/charts/elasticsearch/values.yaml | 192 ++++++++++++-- helm/atlas/templates/configmap.yaml | 28 +- helm/atlas/templates/deployment.yaml | 29 +- helm/atlas/templates/hpa.yaml | 2 +- helm/atlas/templates/logagent-configmap.yaml | 12 + helm/atlas/templates/nginx-cm.yaml | 2 +- helm/atlas/templates/service.yaml | 2 +- helm/atlas/templates/statefulset.yaml | 81 +++--- helm/atlas/templates/statsd-cronjob.yaml | 16 -- helm/atlas/values.yaml | 83 +++++- 17 files changed, 637 insertions(+), 138 deletions(-) create mode 100644 helm/atlas/charts/elasticsearch/templates/es-service-isolated.yaml create mode 100755 helm/atlas/charts/elasticsearch/templates/es-sts-isolated.yaml create mode 100644 helm/atlas/templates/logagent-configmap.yaml diff --git a/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml b/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml index efee5e96e55..9ba07867586 100755 --- a/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml +++ b/helm/atlas/charts/cassandra/templates/backup/cronjob.yaml @@ -27,10 +27,6 @@ spec: spec: restartPolicy: OnFailure serviceAccountName: {{ template "cassandra.serviceAccountName" $ }} - {{- with $.Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} containers: - name: cassandra-backup {{- if and $.Values.multiarch.enabled $.Values.multiarch.image.cain }} diff --git a/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml b/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml index 6a93421e33a..a3010d3cf2d 100644 --- a/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml +++ b/helm/atlas/charts/cassandra/templates/reaper/cronjob.yaml @@ -21,10 +21,6 @@ spec: template: spec: restartPolicy: {{ .Values.reaper.restartPolicy }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 14 }} - {{- end }} containers: - name: reaper {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper }} diff --git a/helm/atlas/charts/cassandra/templates/statefulset.yaml b/helm/atlas/charts/cassandra/templates/statefulset.yaml index 52070022758..146b4e5ba93 100755 --- a/helm/atlas/charts/cassandra/templates/statefulset.yaml +++ b/helm/atlas/charts/cassandra/templates/statefulset.yaml @@ -77,8 +77,13 @@ spec: {{- end }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- toYaml .Values.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} podAntiAffinity: {{- if eq .Values.antiAffinity "soft" }} @@ -188,7 +193,7 @@ spec: {{- if .Values.hostNetwork }} value: {{ required "You must fill \".Values.config.seeds\" with list of Cassandra seeds when hostNetwork is set to true" .Values.config.seeds | quote }} {{- else }} - value: "atlas-cassandra-0.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-1.atlas-cassandra.atlas.svc.cluster.local,atlas-cassandra-2.atlas-cassandra.atlas.svc.cluster.local" + value: "{{- range $i, $e := until $seed_size }}{{ template "cassandra.fullname" $global }}-{{ $i }}.{{ template "cassandra.fullname" $global }}.{{ $global.Values.Namespace }}.svc.{{ $global.Values.config.cluster_domain }}{{- if (lt ( add1 $i ) $seed_size ) }},{{- end }}{{- end }}" {{- end }} - name: MAX_HEAP_SIZE value: {{ default "8192M" .Values.max_heap_size | quote }} @@ -374,7 +379,7 @@ spec: {{- end }} {{- with .Values.nodeSelector }} nodeSelector: - {{- toYaml . | nindent 8 }} +{{ toYaml . | indent 8 }} {{- end }} {{- if or .Values.configOverrides (not .Values.persistence.enabled) }} volumes: @@ -431,4 +436,4 @@ spec: storageClassName: "{{ .Values.persistence.storageClass }}" {{- end }} {{- end }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/charts/cassandra/values.yaml b/helm/atlas/charts/cassandra/values.yaml index 029505d9efe..811cbfa9415 100755 --- a/helm/atlas/charts/cassandra/values.yaml +++ b/helm/atlas/charts/cassandra/values.yaml @@ -1,4 +1,3 @@ - multiarch: enabled: false image: {} @@ -6,8 +5,10 @@ multiarch: ## Cassandra image version ## ref: https://hub.docker.com/r/library/cassandra/ + global: Tier_Type: "" + Deployment_Type: "" image: repo: ghcr.io/atlanhq/cassandra tag: 3.11.12 @@ -212,6 +213,23 @@ affinity: operator: In values: - standard + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: diff --git a/helm/atlas/charts/elasticsearch/templates/es-service-isolated.yaml b/helm/atlas/charts/elasticsearch/templates/es-service-isolated.yaml new file mode 100644 index 00000000000..b78da3d4928 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/es-service-isolated.yaml @@ -0,0 +1,35 @@ +{{- if .Values.isolation.enabled }} +{{- range .Values.isolation.names }} +{{- $nodeName := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + namespace: {{ $.Values.Namespace }} + labels: + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + chart: "{{ $.Chart.Name }}" + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if eq $nodeName "master" }} + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + {{- end }} +spec: + clusterIP: None + publishNotReadyAddresses: {{ eq $nodeName "master" }} + selector: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + ports: + - name: {{ $.Values.service.httpPortName | default "http" }} + port: {{ $.Values.httpPort }} + - name: {{ $.Values.service.transportPortName | default "transport" }} + port: {{ $.Values.transportPort }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/charts/elasticsearch/templates/es-sts-isolated.yaml b/helm/atlas/charts/elasticsearch/templates/es-sts-isolated.yaml new file mode 100755 index 00000000000..5301f226697 --- /dev/null +++ b/helm/atlas/charts/elasticsearch/templates/es-sts-isolated.yaml @@ -0,0 +1,247 @@ +{{- $isAWSCloud := eq .Values.global.cloud "aws" }} +{{- if .Values.isolation.enabled }} +{{- range .Values.isolation.names }} +{{- $nodeName := . }} +--- +apiVersion: {{ template "elasticsearch.statefulset.apiVersion" $ }} +kind: StatefulSet +metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + namespace: {{ $.Values.Namespace }} + labels: + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + chart: "{{ $.Chart.Name }}" + cluster: {{ $.Values.esClusterName }} + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + serviceName: {{ $.Values.esClusterName }}-{{ $nodeName }} + selector: + matchLabels: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + replicas: {{ index $.Values.isolation.replicaCount $nodeName }} + podManagementPolicy: {{ $.Values.podManagementPolicy }} + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + cluster: {{ $.Values.esClusterName }} + annotations: + {{- range $key, $value := $.Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if $.Values.esConfig }} + configchecksum: {{ include (print $.Template.BasePath "/configmap.yaml") $ | sha256sum | trunc 63 }} + {{- end }} + spec: + securityContext: +{{ toYaml $.Values.podSecurityContext | indent 8 }} + volumes: + - name: esconfig + configMap: + name: {{ template "elasticsearch.uname" $ }}-config + - name: elasticsearch-synonym-config + configMap: + name: elasticsearch-synonym-config + {{- if $.Values.extraVolumes }} +{{ toYaml $.Values.extraVolumes | indent 8 }} + {{- end }} + initContainers: + {{- if $.Values.sysctlInitContainer.enabled }} + - name: configure-sysctl + securityContext: + runAsUser: 0 + privileged: true + {{- if and $isAWSCloud $.Values.multiarch.enabled $.Values.multiarch.image.elasticsearch }} + image: {{ $.Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ $.Values.image }}:{{ $.Values.imageTag }}" + {{- end }} + command: ["sysctl", "-w", "vm.max_map_count={{ $.Values.sysctlVmMaxMapCount}}"] + resources: +{{ toYaml $.Values.initResources | indent 10 }} + {{- end }} + containers: + - name: elasticsearch + securityContext: +{{ toYaml $.Values.securityContext | indent 10 }} + {{- if and $isAWSCloud $.Values.multiarch.enabled $.Values.multiarch.image.elasticsearch }} + image: {{ $.Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ $.Values.image }}:{{ $.Values.imageTag }}" + {{- end }} + imagePullPolicy: {{ $.Values.imagePullPolicy }} + ports: + - name: http + containerPort: {{ $.Values.httpPort }} + - name: transport + containerPort: {{ $.Values.transportPort }} + resources: + {{- $tierType := $.Values.deploymentTier | default "" }} + {{- if eq $tierType "Enterprise" }} +{{ toYaml (index $.Values.isolation.resources_enterprise $nodeName) | indent 10 }} + {{- else if eq $tierType "Basic" }} +{{ toYaml (index $.Values.isolation.resources_basic $nodeName) | indent 10 }} + {{- else if eq $tierType "Standard" }} +{{ toYaml (index $.Values.isolation.resources_standard $nodeName) | indent 10 }} + {{- else }} +{{ toYaml (index $.Values.isolation.resources_enterprise $nodeName) | indent 10 }} + {{- end }} + envFrom: {{ toYaml $.Values.envFrom | nindent 10 }} + env: + - name: node.name + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: cluster.name + value: {{ $.Values.clusterName }} + {{ if $nodeName | eq "master" }} + - name: cluster.initial_master_nodes + value: {{ template "elasticsearch.endpoints" $ }} + {{ end }} + - name: discovery.seed_hosts + value: {{ template "elasticsearch.masterService" $ }}-headless + - name: node.roles + value: {{ join "," (index $.Values.isolation.roles $nodeName) | quote }} + - name: node.attr.role + value: {{ $nodeName | quote }} + - name: network.host + value: {{ $.Values.networkHost }} + - name: cluster.max_shards_per_node + value: {{ $.Values.maxShardsPerNode | quote }} + - name: ES_JAVA_OPTS + value: {{ index $.Values.isolation.esJavaOpts $nodeName }} + {{- if $.Values.extraEnvs }} +{{ toYaml $.Values.extraEnvs | indent 10 }} + {{- end }} + volumeMounts: + {{- if ne $nodeName "master" }} + - name: {{ $.Values.esClusterName }}-{{ $nodeName }} + mountPath: /usr/share/elasticsearch/data + {{- end }} + - name: elasticsearch-synonym-config + mountPath: /usr/share/elasticsearch/config/synonym.txt + subPath: synonym.txt + {{- range $path, $config := $.Values.esConfig }} + - name: esconfig + mountPath: /usr/share/elasticsearch/config/{{ $path }} + subPath: {{ $path }} + {{- end }} + {{- if $.Values.extraVolumeMounts }} +{{ toYaml $.Values.extraVolumeMounts | indent 10 }} + {{- end }} + readinessProbe: +{{ toYaml $.Values.readinessProbe | indent 10 }} + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + START_FILE=/tmp/.es_start_file + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ $.Values.protocol }}://127.0.0.1:{{ $.Values.httpPort }}${path} + } + + if [ -f "${START_FILE}" ]; then + echo 'Elasticsearch is already running, checking cluster health' + http "/_cluster/health?timeout={{ $.Values.healthCheckProbeTimeout }}" + else + echo 'Waiting for elasticsearch cluster to become ready' + if http "/_cluster/health?{{ $.Values.clusterHealthCheckParams }}" ; then + touch ${START_FILE} + exit 0 + else + echo 'Cluster is not yet ready' + exit 1 + fi + fi + {{- if $.Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml $.Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- with $.Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with $.Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} + {{- if or (eq $.Values.antiAffinity "hard") (eq $.Values.antiAffinity "soft") }} + affinity: + {{- $tierType := $.Values.deploymentTier | default "" }} + {{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- if eq $.Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + cluster: {{ $.Values.esClusterName }} + topologyKey: {{ $.Values.antiAffinityTopologyKey }} + {{- else if eq $.Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: {{ $.Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ $.Values.esClusterName }} + - key: role + operator: In + values: + - {{ $nodeName }} + {{- end }} + {{- end }} + nodeAffinity: + {{- if eq $.Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range $.Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml $.Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} + terminationGracePeriodSeconds: {{ $.Values.terminationGracePeriod }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + spec: +{{- $tierType := $.Values.deploymentTier | default "" }} + {{- if eq $tierType "Enterprise" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate $nodeName) | indent 6 }} + {{- else if eq $tierType "Basic" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate_basic $nodeName) | indent 6 }} + {{- else if eq $tierType "Standard" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate_standard $nodeName) | indent 6 }} + {{- else }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate $nodeName) | indent 6 }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/charts/elasticsearch/templates/statefulset.yaml b/helm/atlas/charts/elasticsearch/templates/statefulset.yaml index f02977199d3..6a1377cc27d 100755 --- a/helm/atlas/charts/elasticsearch/templates/statefulset.yaml +++ b/helm/atlas/charts/elasticsearch/templates/statefulset.yaml @@ -75,7 +75,7 @@ spec: {{- end }} {{- with .Values.nodeSelector }} nodeSelector: - {{- toYaml . | nindent 8 }} +{{ toYaml . | indent 8 }} {{- end }} {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity }} {{- if .Values.priorityClassName }} @@ -130,8 +130,13 @@ spec: {{- end }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- toYaml .Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} volumes: diff --git a/helm/atlas/charts/elasticsearch/values.yaml b/helm/atlas/charts/elasticsearch/values.yaml index 6f3709b749d..1e0cace36b1 100755 --- a/helm/atlas/charts/elasticsearch/values.yaml +++ b/helm/atlas/charts/elasticsearch/values.yaml @@ -4,6 +4,7 @@ multiarch: enabled: false image: {} + clusterName: "atlas-elasticsearch" nodeGroup: "master" @@ -11,6 +12,7 @@ nodeGroup: "master" # This should be set to clusterName + "-" + nodeGroup for your master group global: Tier_Type: "" + Deployment_Type: "" masterService: "" # Elasticsearch roles that will be applied to this nodeGroup @@ -23,6 +25,7 @@ roles: replicas: 3 esMajorVersion: 7 minimumMasterNodes: 1 + # JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage # This is simpler and more reliable than manual calculation esJavaOpts: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" @@ -34,7 +37,6 @@ esConfig: http.max_content_length: 2147483647b ingest.geoip.downloader.enabled: false xpack.security.enabled: false - log4j2.properties: | -Dlog4j2.formatMsgNoLookups=true @@ -179,19 +181,24 @@ nodeAffinity: - key: cloud.google.com/gke-provisioning #GCP operator: In values: - - standard - - # requiredDuringSchedulingIgnoredDuringExecution: - # nodeSelectorTerms: - # - matchExpressions: - # - key: nodegroup - # operator: NotIn - # values: - # - atlan-spark - # - key: lifecycle - # operator: In - # values: - # - None + - standard + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard # The default is to deploy all pods serially. By setting this to parallel all pods are started at # the same time when bootstrapping the cluster @@ -240,11 +247,11 @@ terminationGracePeriod: 120 sysctlVmMaxMapCount: 262144 readinessProbe: - failureThreshold: 6 - initialDelaySeconds: 30 + failureThreshold: 3 + initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 3 - timeoutSeconds: 15 + timeoutSeconds: 5 custom_deployment: enabled: false @@ -252,7 +259,7 @@ custom_deployment: - m6a.2xlarge # https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params wait_for_status -clusterHealthCheckParams: "wait_for_status=yellow&timeout=5s" +clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s" # Timeout for readiness probe health check when ES is already running healthCheckProbeTimeout: "1s" @@ -317,3 +324,152 @@ backup: image: ghcr.io/atlanhq/alpine-curl-atlan-v2:3.21.0 #imageTag: latest schedule: '0 3 * * *' + +esClusterName: "atlas-elasticsearch2" +deploymentTier: Enterprise +isolation: + enabled: false + names: + - "ui-search" + - "non-ui-search" + replicaCount: + ui-search: 3 + non-ui-search: 3 + + resources_enterprise: + master: + requests: + cpu: "100m" + memory: "3Gi" + limits: + cpu: "1000m" + memory: "4Gi" + ui-search: + requests: + cpu: "2" + memory: "6Gi" + limits: + cpu: "2" + memory: "7Gi" + non-ui-search: + requests: + cpu: "2" + memory: "6Gi" + limits: + cpu: "2" + memory: "7Gi" + + resources_standard: + master: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + ui-search: + requests: + cpu: "1000m" + memory: "4Gi" + limits: + cpu: "2000m" + memory: "4Gi" + non-ui-search: + requests: + cpu: "1000m" + memory: "4Gi" + limits: + cpu: "2000m" + memory: "4Gi" + + resources_basic: + master: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "1Gi" + ui-search: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + non-ui-search: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + + # JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage for isolation mode + esJavaOpts: + master: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + ui-search: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + non-ui-search: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + + roles: + master: + - "master" + ui-search: + - "data" + - "ingest" + non-ui-search: + - "data" + - "ingest" + # Enterprise tier storage + volumeClaimTemplate: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 200Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 200Gi + + # Standard tier storage + volumeClaimTemplate_standard: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 100Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 100Gi + + # Basic tier storage + volumeClaimTemplate_basic: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 50Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 50Gi \ No newline at end of file diff --git a/helm/atlas/templates/configmap.yaml b/helm/atlas/templates/configmap.yaml index 815408e7f55..398db75da45 100644 --- a/helm/atlas/templates/configmap.yaml +++ b/helm/atlas/templates/configmap.yaml @@ -145,9 +145,8 @@ data: atlas.graph.index.search.elasticsearch.client-only=true atlas.graph.index.search.elasticsearch.retry_on_conflict=5 atlas.graph.index.search.max-result-set-size=1000 - atlas.index.audit.elasticsearch.total_field_limit=10000 - atlas.index.audit.elasticsearch.refresh_interval: 1s - + atlas.index.audit.elasticsearch.total_field_limit={{ .Values.atlas.index.audit_index_field_limit }} + atlas.index.audit.elasticsearch.refresh_interval={{ .Values.atlas.index.audit_index_refresh_interval }} # Solr-specific configuration property # atlas.graph.index.search.max-result-set-size=150 @@ -459,7 +458,7 @@ data: atlas.metrics.uri_patterns=/api/(meta|atlas/v2)/glossary/terms/[^/]+/assignedEntities,/api/(meta|atlas/v2)/lineage/[^/]+,/api/(meta|atlas/v2)/lineage/list,/api/(meta|atlas/v2)/entity/accessors,/api/(meta|atlas/v2)/entity/auditSearch,/api/(meta|atlas/v2)/entity/bulk,/api/(meta|atlas/v2)/entity/bulk/setClassifications,/api/(meta|atlas/v2)/entity/bulk/uniqueAttribute,/api/(meta|atlas/v2)/entity/evaluator,/api/(meta|atlas/v2)/entity/guid/[^/]+,/api/(meta|atlas/v2)/entity/guid/[^/]+/businessmetadata,/api/(meta|atlas/v2)/entity/uniqueAttribute/type/[^/]+,/api/(meta|atlas/v2)/search/indexsearch,/api/(meta|atlas/v2)/entity/repairhaslineage,/api/(meta|atlas/v2)/types/typedef/name/[^/]+,/api/(meta|atlas/v2)/types/typedefs,/api/atlas/admin/metrics/prometheus,/api/atlas/admin/pushMetricsToStatsd,/api/atlas/v2/auth/download/policies/[^/]+,/api/atlas/v2/auth/download/roles/[^/]+,/api/atlas/v2/auth/download/users/[^/]+,/api/meta/entity/uniqueAttribute/type/[^/]+,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/groups,/auth/admin/realms/[^/]+/groups/[^/]+/role-mappings/realm,/auth/admin/realms/[^/]+/roles,/auth/admin/realms/[^/]+/roles-by-id/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+/composites,/auth/admin/realms/[^/]+/roles/[^/]+/groups,/auth/admin/realms/[^/]+/roles/[^/]+/users,/auth/admin/realms/[^/]+/users,/auth/admin/realms/[^/]+/users/[^/]+/groups,/auth/admin/realms/[^/]+/users/[^/]+/role-mappings/realm,/auth/realms/[^/]+/protocol/openid-connect/token,/auth/realms/[^/]+/protocol/openid-connect/token/introspect,/users/mappings,/roles/mappings,/api/(meta|atlas/v2)/business-policy/[^/]+/unlink-business-policy,/api/(meta|atlas/v2)/business-policy/link-business-policy,/api/(meta|atlas/v2)/direct/search,/api/(meta|atlas/v2)/attribute/update atlas.metrics.method_level.enable=true - atlas.metrics.method_patterns=policiesPrefetchFailed,processTermAssignments,elasticSearchQuery,elasticQueryTimeout,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,scrubEntityHeader,getDiffResult + atlas.metrics.method_patterns=policiesPrefetchFailed,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,processTermAssignments,elasticSearchQuery,mapVertexToAtlasEntityHeader,elasticQueryTimeout,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,isAccessAllowed,findDuplicatePendingTasksV2 {{ if .Values.atlas.janusgraph.atomic_mutation }} ### Atomic batch related configs ### atlas.graph.storage.cql.atomic-batch-mutate={{ .Values.atlas.janusgraph.atomic_mutation }} @@ -467,29 +466,30 @@ data: {{ end }} ######### Canary-Release ######### - atlas.canary.keycloak.token-introspection = {{ .Values.atlas.keycloak.token_introspection}} - atlas.keycloak.introspection.use.cache = {{ .Values.atlas.keycloak.introspection_cache }} - + atlas.canary.keycloak.token-introspection = true + atlas.keycloak.introspection.use.cache = true + + ######### Atlas Maintenance Mode ######### + atlas.maintenance.mode={{ .Values.atlas.maintenanceMode }} + ######### Atlas Inddexsearch configs ######### atlas.indexsearch.enable.api.limit={{ .Values.atlas.indexsearch.enable_api_limit }} atlas.indexsearch.query.size.max.limit={{ .Values.atlas.indexsearch.query_size_max_limit }} atlas.indexsearch.async.enable={{ .Values.atlas.indexsearch.enable_async }} atlas.indexsearch.async.search.keep.alive.time.in.seconds={{ .Values.atlas.indexsearch.request_timeout_in_secs }} atlas.indexsearch.enable.janus.optimization={{ .Values.atlas.indexsearch.enable_janus_optimization }} + atlas.indexsearch.request.isolation.enable={{ .Values.atlas.indexsearch.enable_request_isolation }} atlas.indexsearch.enable.janus.optimization.for.relationship={{ .Values.atlas.indexsearch.enable_janus_optimization_for_relationship }} atlas.indexsearch.enable.janus.optimization.extended={{ .Values.atlas.indexsearch.enable_janus_optimization_extended }} atlas.indexsearch.enable.janus.optimization.for.classifications={{ .Values.atlas.indexsearch.enable_janus_optimization_for_classifications }} - atlas.indexsearch.enable.janus.optimization.for.lineage={{ .Values.atlas.indexsearch.enable_janus_optimization_for_lineage }} - atlas.jg.super.vertex.edge.count={{ .Values.atlas.jg.super_vertex_edge_count }} - atlas.jg.super.vertex.edge.timeout={{ .Values.atlas.jg.super_vertex_edge_timeout }} + atlas.jg.super.vertex.min.edge.count={{ .Values.atlas.jg.super.vertex.min.edge.count }} ######### Atlas Bulk API configs ######### atlas.bulk.api.max.entities.allowed={{ .Values.atlas.bulk.max_entities_allowed }} - atlas.bulk.api.enable.janus.optimization={{ .Values.atlas.bulk.enable_janus_optimization }} - ######### Atlas Lineage configs ######### + ######### Atlas Lineage configs ######### atlas.lineage.optimised.calculation={{ .Values.atlas.lineage.optimised_calculation }} - atlas.lineage.enable.connection.lineage={{ .Values.atlas.lineage.enable_connection_lineage }} + ######### Atlas Distributed Task configs ######### atlas.distributed.task.enabled={{ .Values.atlas.distributed_task.enabled }} {{- if eq .Values.atlas.distributed_task.enabled true }} @@ -497,7 +497,7 @@ data: atlas.relationship.cleanup.supported.relationship.labels={{ .Values.atlas.distributed_task.cleanup_supported_relationship_labels }} {{- end }} - ######### Atlas Typedefs update configs ######### + ######### Atlas Typedefs update configs ######### atlas.types.update.async.enable={{ .Values.atlas.types_update.async_enable }} atlas.types.update.thread.count={{ .Values.atlas.types_update.thread_count }} diff --git a/helm/atlas/templates/deployment.yaml b/helm/atlas/templates/deployment.yaml index 4f6539c0950..96df1ead36d 100644 --- a/helm/atlas/templates/deployment.yaml +++ b/helm/atlas/templates/deployment.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: atlas + name: {{ .Values.atlas.name }} namespace: {{ .Values.Namespace }} labels: app: {{ template "name" . }} @@ -28,6 +28,10 @@ spec: annotations: {{ toYaml .Values.atlas.podAnnotations | indent 8 }} spec: + {{- with .Values.atlas.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} affinity: nodeAffinity: @@ -69,19 +73,6 @@ spec: nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} - {{- if or .Values.atlas.tolerations $multiarchEnabled }} - tolerations: - {{- if .Values.atlas.tolerations }} - {{ toYaml .Values.atlas.tolerations | nindent 8 }} - {{- end }} - {{- if $multiarchEnabled }} - - key: "archtype" - operator: "Equal" - value: "arm64" - effect: "NoSchedule" - {{- end }} - {{- end }} initContainers: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} @@ -102,6 +93,7 @@ spec: ports: - containerPort: {{ .Values.atlas.service.targetPort }} env: + {{- toYaml .Values.atlas.env | nindent 12 }} - name: K8S_POD_IP valueFrom: fieldRef: @@ -132,8 +124,7 @@ spec: value: http://$(HOST_IP):4317 - name: OTEL_RESOURCE_ATTRIBUTES value: >- - k8s.pod.name=$(K8S_POD_NAME),k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- toYaml .Values.atlas.env | nindent 12 }} + k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs {{- if eq .Values.albTenant true }} - name: ALB_ENABLED value: "true" @@ -197,9 +188,11 @@ spec: volumeMounts: - name: telegraf-conf mountPath: /etc/telegraf/ + {{- if .Values.nginx.enabled }} - name: nginx-log-volume mountPath: /var/log/nginx readOnly: true + {{- end }} ports: - name: telegrafd containerPort: 9273 @@ -231,6 +224,8 @@ spec: volumes: - name: atlas-logs emptyDir: {} + - name: atlas-config-map-rw-vol + emptyDir: {} - name: atlas-config configMap: name: atlas-config @@ -267,4 +262,4 @@ spec: configMap: name: atlas-telegrafd {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/atlas/templates/hpa.yaml b/helm/atlas/templates/hpa.yaml index e3a420fe2d0..e7e185867e0 100644 --- a/helm/atlas/templates/hpa.yaml +++ b/helm/atlas/templates/hpa.yaml @@ -16,7 +16,7 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ .Values.hpa.name }} + name: {{ .Values.atlas.name }} metrics: - type: Resource resource: diff --git a/helm/atlas/templates/logagent-configmap.yaml b/helm/atlas/templates/logagent-configmap.yaml new file mode 100644 index 00000000000..f765bd64834 --- /dev/null +++ b/helm/atlas/templates/logagent-configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: filebeat-config + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +data: +{{- toYaml .Values.atlas.filebeatLogagent.data | nindent 8 }} \ No newline at end of file diff --git a/helm/atlas/templates/nginx-cm.yaml b/helm/atlas/templates/nginx-cm.yaml index 7573cf8f5a4..1735830c41f 100644 --- a/helm/atlas/templates/nginx-cm.yaml +++ b/helm/atlas/templates/nginx-cm.yaml @@ -213,7 +213,7 @@ data: } location = /api/atlas/v2/entity/bulk { - limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.bulk.burst }} nodelay; limit_req_status 429; add_header Retry-After 1 always; proxy_pass $atlas_upstream$request_uri; diff --git a/helm/atlas/templates/service.yaml b/helm/atlas/templates/service.yaml index 3ace207d0da..356a5e88157 100644 --- a/helm/atlas/templates/service.yaml +++ b/helm/atlas/templates/service.yaml @@ -11,7 +11,7 @@ metadata: {{- else if eq .Values.global.cloud "azure" }} konghq.com/path: /api/atlas/v2/ konghq.com/plugins: svc-rate-limit - {{- else }} + {{- else if eq .Values.albTenant false }} konghq.com/path: /api/atlas/v2/ konghq.com/plugins: svc-rate-limit {{- end }} diff --git a/helm/atlas/templates/statefulset.yaml b/helm/atlas/templates/statefulset.yaml index 28e2d9fb439..da278a67550 100644 --- a/helm/atlas/templates/statefulset.yaml +++ b/helm/atlas/templates/statefulset.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: - name: atlas + name: {{ .Values.atlas.name }} namespace: {{ .Values.Namespace }} labels: app: {{ template "name" . }} @@ -27,23 +27,16 @@ spec: annotations: {{ toYaml .Values.atlas.podAnnotations | indent 8 }} spec: + {{- with .Values.atlas.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} affinity: nodeAffinity: {{- if eq .Values.atlas.custom_deployment.enabled true }} requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} - - matchExpressions: - - key: purpose - operator: In - values: - - search - - key: karpenter.sh/capacity-type - operator: In - values: - - on-demand - {{- else }} - matchExpressions: - key: node.kubernetes.io/instance-type operator: In @@ -51,10 +44,14 @@ spec: {{- range .Values.atlas.custom_deployment.instance_type }} - {{ . }} {{- end }} - {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} {{- if eq .Values.atlas.podAntiAffinity true }} podAntiAffinity: @@ -64,10 +61,6 @@ spec: {{- if .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }} {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} initContainers: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} @@ -75,18 +68,19 @@ spec: containers: - name: {{ .Chart.Name }}-main command: [ - "/bin/bash", - "-c", - "/create-atlas-keycloak-config.sh; - /env_change.sh; - /opt/apache-atlas/bin/atlas_start.py; - tail -F /opt/apache-atlas/logs/*.log;" - ] + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} ports: - containerPort: {{ .Values.atlas.service.targetPort }} env: + {{- toYaml .Values.atlas.env | nindent 12 }} - name: K8S_POD_IP valueFrom: fieldRef: @@ -118,21 +112,20 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES value: >- k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- toYaml .Values.atlas.env | nindent 12 }} {{- if eq .Values.albTenant true }} - name: ALB_ENABLED value: "true" {{- end }} envFrom: - - secretRef: - name: atlas-keycloak-config + - secretRef: + name: atlas-keycloak-config {{- if .Values.multitenant }} - - secretRef: - name: atlas-secret-manager - - secretRef: - name: atlas-secret-parameter-store - - secretRef: - name: instance-domain-name + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name {{- end }} resources: {{- $tierType := .Values.global.Tier_Type | default "" }} @@ -180,9 +173,11 @@ spec: volumeMounts: - name: telegraf-conf mountPath: /etc/telegraf/ + {{- if .Values.nginx.enabled }} - name: nginx-log-volume mountPath: /var/log/nginx readOnly: true + {{- end }} ports: - name: telegrafd containerPort: 9273 @@ -214,6 +209,15 @@ spec: volumes: - name: atlas-logs emptyDir: {} + - name: atlas-config-map-rw-vol + emptyDir: {} + {{- if .Values.nginx.enabled }} + - name: nginx-log-volume + emptyDir: {} + - name: nginx-config + configMap: + name: rate-limit-nginx-config + {{- end }} - name: atlas-config configMap: name: atlas-config @@ -236,18 +240,9 @@ spec: configMap: name: atlas-audit-index defaultMode: 0755 - - name: atlas-config-map-rw-vol - emptyDir: {} - {{- if .Values.nginx.enabled }} - - name: nginx-log-volume - emptyDir: {} - - name: nginx-config - configMap: - name: rate-limit-nginx-config - {{- end }} {{- if .Values.atlas.telegraf.enabled }} - name: telegraf-conf configMap: name: atlas-telegrafd {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/helm/atlas/templates/statsd-cronjob.yaml b/helm/atlas/templates/statsd-cronjob.yaml index 52763ea386e..b6863a61c99 100644 --- a/helm/atlas/templates/statsd-cronjob.yaml +++ b/helm/atlas/templates/statsd-cronjob.yaml @@ -22,22 +22,6 @@ spec: spec: template: spec: - {{- if or (and .Values.multiarch (eq .Values.multiarch.enabled true)) .Values.atlas.tolerations }} - tolerations: - {{- if and .Values.multiarch (eq .Values.multiarch.enabled true) }} - - key: "archtype" - operator: "Equal" - value: "arm64" - effect: "NoSchedule" - {{- end }} - {{- if .Values.atlas.tolerations }} - {{ toYaml .Values.atlas.tolerations | nindent 10 }} - {{- end }} - {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} {{- if .Values.atlas.imagePullSecrets }} imagePullSecrets: {{- toYaml .Values.atlas.imagePullSecrets | nindent 12 }} diff --git a/helm/atlas/values.yaml b/helm/atlas/values.yaml index b6638c6d84a..c6d999b9731 100644 --- a/helm/atlas/values.yaml +++ b/helm/atlas/values.yaml @@ -8,6 +8,7 @@ multiarch: # Declare variables to be passed into your templates. global: Tier_Type: "" + Deployment_Type: "" cloud: "" tenantName: "" svcIsolation: @@ -32,6 +33,8 @@ hpa: averageUtilization: 85 atlas: + name: atlas + maintenanceMode: false cache: enabled: false podAntiAffinity: true @@ -44,36 +47,33 @@ atlas: ranger: RANGER_PASSWORD: '{{repl ConfigOption "RangerPassword"}}' RANGER_SERVICE_URL: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + config: + entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" multitenant: '' Deployment_Type: '' replicaCount: 2 - config: - entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" janusgraph: atomic_mutation: true janusgraph_tx_buffer_size: 8192 - keycloak: - token_introspection: true - introspection_cache: false indexsearch: enable_api_limit: false query_size_max_limit: 100000 enable_async: true request_timeout_in_secs: 60 enable_janus_optimization: true + enable_request_isolation: false enable_janus_optimization_for_relationship: true - enable_janus_optimization_for_classifications: false enable_janus_optimization_extended: true - enable_janus_optimization_for_lineage: false jg: - super_vertex_edge_count: 100000 - super_vertex_edge_timeout: 30 + super: + vertex: + min: + edge: + count: 200 bulk: max_entities_allowed: 10000 - enable_janus_optimization: true lineage: - optimised_calculation: true - enable_connection_lineage: false + optimised_calculation: false authorizer: enable_delta_based_refresh: true enable_abac: true @@ -102,6 +102,13 @@ atlas: affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND - weight: 1 preference: matchExpressions: @@ -115,7 +122,26 @@ atlas: - key: cloud.google.com/gke-provisioning #GCP operator: In values: - - standard + - standard + + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: @@ -308,6 +334,35 @@ atlas: SENTRY_DSN_PROD: '' INSTANCE_NAME: '' + # Config for logagent sidecar for atlas. + # Used in logagent configmap and atlas statefulset. + filebeatLogagent: + image: + repository: ghcr.io/atlanhq/filebeat + tag: 6.8.4 + data: + filebeat.yml: |- + filebeat.inputs: + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/audit.log* + fields: + logtype: audit + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/metric.log + fields: + logtype: metric + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/atlas_perf.log + fields: + logtype: performance + output.logstash: + hosts: ['logstash-logstash:5044'] # Redis config for atlas # This is used in atlas configmap redis: @@ -466,7 +521,7 @@ nginx: proxyConnectTimeout: "60s" # 1 min - Time taken to establish a connection to the atlas server. ratelimit: enabled: true - default_atlas_service: true + default_atlas_service: false default: zoneMemory: "20m" rate: "500r" From 6d5fd3d4b28fbf4e6041275ceb0f824e9b429f40 Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 27 Oct 2025 19:26:12 +0530 Subject: [PATCH 3/7] sync with atlan preprod --- .../sample/create-storage-gce.yaml | 7 - .../templates/cassandra-online-dc-config.yaml | 8 +- .../templates/reaper/cronjob.yaml | 8 +- .../templates/statefulset.yaml | 31 +- .../charts/cassandra-online-dc/values.yaml | 27 +- .../templates/es-service-isolated.yaml | 35 ++ .../templates/es-sts-isolated.yaml | 245 ++++++++++++++ .../templates/rolebinding.yaml | 2 +- .../templates/statefulset.yaml | 7 +- .../charts/elasticsearch-read/values.yaml | 192 ++++++++++- .../atlas-audit-index-configmap.yaml | 16 +- helm/atlas-read/templates/configmap.yaml | 30 +- helm/atlas-read/templates/deployment.yaml | 28 +- helm/atlas-read/templates/hpa.yaml | 2 +- helm/atlas-read/templates/nginx-cm.yaml | 320 ++++++++++++++++++ helm/atlas-read/templates/pdb.yaml | 25 ++ helm/atlas-read/templates/podmonitor.yaml | 39 +++ .../templates/ratelimited-service.yaml | 32 ++ helm/atlas-read/templates/service.yaml | 11 +- helm/atlas-read/templates/statefulset.yaml | 107 +++--- helm/atlas-read/templates/statsd-cronjob.yaml | 6 +- .../atlas-read/templates/telegraf-config.yaml | 19 ++ helm/atlas-read/templates/tls_secrets.yaml | 12 + helm/atlas-read/values.yaml | 140 ++++++-- 24 files changed, 1184 insertions(+), 165 deletions(-) delete mode 100755 helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml create mode 100644 helm/atlas-read/charts/elasticsearch-read/templates/es-service-isolated.yaml create mode 100755 helm/atlas-read/charts/elasticsearch-read/templates/es-sts-isolated.yaml create mode 100644 helm/atlas-read/templates/nginx-cm.yaml create mode 100644 helm/atlas-read/templates/pdb.yaml create mode 100644 helm/atlas-read/templates/podmonitor.yaml create mode 100644 helm/atlas-read/templates/ratelimited-service.yaml create mode 100644 helm/atlas-read/templates/tls_secrets.yaml diff --git a/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml b/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml deleted file mode 100755 index 2467b95227e..00000000000 --- a/helm/atlas-read/charts/cassandra-online-dc/sample/create-storage-gce.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: StorageClass -apiVersion: storage.k8s.io/v1 -metadata: - name: generic -provisioner: kubernetes.io/gce-pd -parameters: - type: pd-ssd diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml index 4e7ed59b9e1..2002286ba55 100644 --- a/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/cassandra-online-dc-config.yaml @@ -626,7 +626,7 @@ data: # # Setting listen_address to 0.0.0.0 is always wrong. # - listen_address: 172.28.60.207 + # listen_address: # Set dynamically via POD_IP environment variable # Set listen_address OR listen_interface, not both. Interfaces must correspond # to a single address, IP aliasing is not supported. @@ -640,7 +640,7 @@ data: # Address to broadcast to other Cassandra nodes # Leaving this blank will set it to the same value as listen_address - broadcast_address: 172.28.60.207 + # broadcast_address: # Set dynamically via POD_IP environment variable # When using multiple physical network interfaces, set this # to true to listen on broadcast_address in addition to @@ -722,7 +722,7 @@ data: # be set to 0.0.0.0. If left blank, this will be set to the value of # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must # be set. - broadcast_rpc_address: 172.28.60.207 + # broadcast_rpc_address: # Set dynamically via POD_IP environment variable # enable or disable keepalive on rpc/native connections rpc_keepalive: true @@ -1616,4 +1616,4 @@ data: JVM_OPTS="$JVM_OPTS $MX4J_ADDRESS" JVM_OPTS="$JVM_OPTS $MX4J_PORT" JVM_OPTS="$JVM_OPTS $JVM_EXTRA_OPTS" -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml index 1341fad52d1..5d8c9ccfd28 100644 --- a/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/reaper/cronjob.yaml @@ -35,10 +35,6 @@ spec: effect: "NoSchedule" {{- end }} {{- end }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 14 }} - {{- end }} containers: - name: reaper {{- if and .Values.multiarch.enabled .Values.multiarch.image.reaper }} @@ -91,5 +87,5 @@ spec: {{- toYaml .Values.reaper.sidecar.resources | nindent 20 }} imagePullSecrets: - name: {{ .Values.image.pullSecrets }} -{{- end }} -{{- end }} +{{- end}} +{{- end}} \ No newline at end of file diff --git a/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml b/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml index b3cdf36d5b8..f30216979fe 100755 --- a/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml +++ b/helm/atlas-read/charts/cassandra-online-dc/templates/statefulset.yaml @@ -66,6 +66,7 @@ spec: - {{ . }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- if and .Values.multiarch (hasKey .Values.multiarch "enabled") (eq .Values.multiarch.enabled true) }} - weight: 100 @@ -77,6 +78,20 @@ spec: - arm64 {{- end }} {{- toYaml .Values.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- if and .Values.multiarch (hasKey .Values.multiarch "enabled") (eq .Values.multiarch.enabled true) }} + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + {{- end }} + {{- end }} {{- end }} podAntiAffinity: {{- if eq .Values.antiAffinity "soft" }} @@ -215,6 +230,18 @@ spec: valueFrom: fieldRef: fieldPath: status.podIP + - name: CASSANDRA_LISTEN_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: CASSANDRA_BROADCAST_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: CASSANDRA_BROADCAST_RPC_ADDRESS + valueFrom: + fieldRef: + fieldPath: status.podIP - name: LOCAL_JMX value: {{ default "no" .Values.config.local_jmx | quote }} {{- range $key, $value := .Values.env }} @@ -381,7 +408,7 @@ spec: {{- end }} {{- with .Values.nodeSelector }} nodeSelector: - {{- toYaml . | nindent 8 }} +{{ toYaml . | indent 8 }} {{- end }} {{- if or .Values.configOverrides (not .Values.persistence.enabled) }} volumes: @@ -439,4 +466,4 @@ spec: {{- end }} {{- end }} {{- end }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/charts/cassandra-online-dc/values.yaml b/helm/atlas-read/charts/cassandra-online-dc/values.yaml index c3cd93fc794..2188c875e0c 100755 --- a/helm/atlas-read/charts/cassandra-online-dc/values.yaml +++ b/helm/atlas-read/charts/cassandra-online-dc/values.yaml @@ -1,4 +1,3 @@ - multiarch: enabled: false image: {} @@ -6,8 +5,10 @@ multiarch: ## Cassandra image version ## ref: https://hub.docker.com/r/library/cassandra/ + global: Tier_Type: "" + Deployment_Type: "" image: repo: ghcr.io/atlanhq/cassandra tag: 3.11.12 @@ -198,6 +199,13 @@ custom_deployment: affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND - weight: 1 preference: matchExpressions: @@ -212,6 +220,23 @@ affinity: operator: In values: - standard + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/es-service-isolated.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/es-service-isolated.yaml new file mode 100644 index 00000000000..b78da3d4928 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/es-service-isolated.yaml @@ -0,0 +1,35 @@ +{{- if .Values.isolation.enabled }} +{{- range .Values.isolation.names }} +{{- $nodeName := . }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + namespace: {{ $.Values.Namespace }} + labels: + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + chart: "{{ $.Chart.Name }}" + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if eq $nodeName "master" }} + annotations: + service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" + {{- end }} +spec: + clusterIP: None + publishNotReadyAddresses: {{ eq $nodeName "master" }} + selector: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + ports: + - name: {{ $.Values.service.httpPortName | default "http" }} + port: {{ $.Values.httpPort }} + - name: {{ $.Values.service.transportPortName | default "transport" }} + port: {{ $.Values.transportPort }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/es-sts-isolated.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/es-sts-isolated.yaml new file mode 100755 index 00000000000..51f494cc7c1 --- /dev/null +++ b/helm/atlas-read/charts/elasticsearch-read/templates/es-sts-isolated.yaml @@ -0,0 +1,245 @@ +{{- $isAWSCloud := eq .Values.global.cloud "aws" }} +{{- if .Values.isolation.enabled }} +{{- range .Values.isolation.names }} +{{- $nodeName := . }} +--- +apiVersion: {{ template "elasticsearch.statefulset.apiVersion" $ }} +kind: StatefulSet +metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + namespace: {{ $.Values.Namespace }} + labels: + heritage: {{ $.Release.Service | quote }} + release: {{ $.Release.Name | quote }} + chart: "{{ $.Chart.Name }}" + cluster: {{ $.Values.esClusterName }} + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + {{- range $key, $value := $.Values.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} +spec: + serviceName: {{ $.Values.esClusterName }}-{{ $nodeName }} + selector: + matchLabels: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + replicas: {{ index $.Values.isolation.replicaCount $nodeName }} + podManagementPolicy: {{ $.Values.podManagementPolicy }} + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + app: {{ $.Values.esClusterName }}-{{ $nodeName }} + role: {{ $nodeName }} + cluster: {{ $.Values.esClusterName }} + annotations: + {{- range $key, $value := $.Values.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- if $.Values.esConfig }} + configchecksum: {{ include (print $.Template.BasePath "/configmap.yaml") $ | sha256sum | trunc 63 }} + {{- end }} + spec: + securityContext: +{{ toYaml $.Values.podSecurityContext | indent 8 }} + volumes: + - name: esconfig + configMap: + name: {{ template "elasticsearch.uname" $ }}-config + - name: elasticsearch-synonym-config + configMap: + name: elasticsearch-synonym-config + {{- if $.Values.extraVolumes }} +{{ toYaml $.Values.extraVolumes | indent 8 }} + {{- end }} + initContainers: + {{- if $.Values.sysctlInitContainer.enabled }} + - name: configure-sysctl + securityContext: + runAsUser: 0 + privileged: true + {{- if and $isAWSCloud $.Values.multiarch.enabled $.Values.multiarch.image.elasticsearch }} + image: {{ $.Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ $.Values.image }}:{{ $.Values.imageTag }}" + {{- end }} + command: ["sysctl", "-w", "vm.max_map_count={{ $.Values.sysctlVmMaxMapCount}}"] + resources: +{{ toYaml $.Values.initResources | indent 10 }} + {{- end }} + containers: + - name: elasticsearch + securityContext: +{{ toYaml $.Values.securityContext | indent 10 }} + {{- if and $isAWSCloud $.Values.multiarch.enabled $.Values.multiarch.image.elasticsearch }} + image: {{ $.Values.multiarch.image.elasticsearch }} + {{- else }} + image: "{{ $.Values.image }}:{{ $.Values.imageTag }}" + {{- end }} + imagePullPolicy: {{ $.Values.imagePullPolicy }} + ports: + - name: http + containerPort: {{ $.Values.httpPort }} + - name: transport + containerPort: {{ $.Values.transportPort }} + resources: + {{- $tierType := $.Values.deploymentTier | default "" }} + {{- if eq $tierType "Enterprise" }} +{{ toYaml (index $.Values.isolation.resources_enterprise $nodeName) | indent 10 }} + {{- else if eq $tierType "Basic" }} +{{ toYaml (index $.Values.isolation.resources_basic $nodeName) | indent 10 }} + {{- else if eq $tierType "Standard" }} +{{ toYaml (index $.Values.isolation.resources_standard $nodeName) | indent 10 }} + {{- else }} +{{ toYaml (index $.Values.isolation.resources_enterprise $nodeName) | indent 10 }} + {{- end }} + envFrom: {{ toYaml $.Values.envFrom | nindent 10 }} + env: + - name: node.name + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: cluster.name + value: {{ $.Values.clusterName }} + {{ if $nodeName | eq "master" }} + - name: cluster.initial_master_nodes + value: {{ template "elasticsearch.endpoints" $ }} + {{ end }} + - name: discovery.seed_hosts + value: {{ template "elasticsearch.masterService" $ }}-headless + - name: node.roles + value: {{ join "," (index $.Values.isolation.roles $nodeName) | quote }} + - name: node.attr.role + value: {{ $nodeName | quote }} + - name: network.host + value: {{ $.Values.networkHost }} + - name: cluster.max_shards_per_node + value: {{ $.Values.maxShardsPerNode | quote }} + - name: ES_JAVA_OPTS + value: {{ index $.Values.isolation.esJavaOpts $nodeName }} + {{- if $.Values.extraEnvs }} +{{ toYaml $.Values.extraEnvs | indent 10 }} + {{- end }} + volumeMounts: + - name: {{ $.Values.esClusterName }}-{{ $nodeName }} + mountPath: /usr/share/elasticsearch/data + - name: elasticsearch-synonym-config + mountPath: /usr/share/elasticsearch/config/synonym.txt + subPath: synonym.txt + {{- range $path, $config := $.Values.esConfig }} + - name: esconfig + mountPath: /usr/share/elasticsearch/config/{{ $path }} + subPath: {{ $path }} + {{- end }} + {{- if $.Values.extraVolumeMounts }} +{{ toYaml $.Values.extraVolumeMounts | indent 10 }} + {{- end }} + readinessProbe: +{{ toYaml $.Values.readinessProbe | indent 10 }} + exec: + command: + - sh + - -c + - | + #!/usr/bin/env bash -e + START_FILE=/tmp/.es_start_file + + http () { + local path="${1}" + if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then + BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" + else + BASIC_AUTH='' + fi + curl -XGET -s -k --fail ${BASIC_AUTH} {{ $.Values.protocol }}://127.0.0.1:{{ $.Values.httpPort }}${path} + } + + if [ -f "${START_FILE}" ]; then + echo 'Elasticsearch is already running, checking cluster health' + http "/_cluster/health?timeout={{ $.Values.healthCheckProbeTimeout }}" + else + echo 'Waiting for elasticsearch cluster to become ready' + if http "/_cluster/health?{{ $.Values.clusterHealthCheckParams }}" ; then + touch ${START_FILE} + exit 0 + else + echo 'Cluster is not yet ready' + exit 1 + fi + fi + {{- if $.Values.imagePullSecrets }} + imagePullSecrets: +{{ toYaml $.Values.imagePullSecrets | indent 8 }} + {{- end }} + {{- with $.Values.nodeSelector }} + nodeSelector: +{{ toYaml . | indent 8 }} + {{- end }} + {{- with $.Values.tolerations }} + tolerations: +{{ toYaml . | indent 8 }} + {{- end }} + {{- if or (eq $.Values.antiAffinity "hard") (eq $.Values.antiAffinity "soft") }} + affinity: + {{- $tierType := $.Values.deploymentTier | default "" }} + {{- if or (eq $tierType "Enterprise") (eq $tierType "") }} + {{- if eq $.Values.antiAffinity "hard" }} + podAntiAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + - labelSelector: + matchLabels: + cluster: {{ $.Values.esClusterName }} + topologyKey: {{ $.Values.antiAffinityTopologyKey }} + {{- else if eq $.Values.antiAffinity "soft" }} + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: {{ $.Values.antiAffinityTopologyKey }} + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ $.Values.esClusterName }} + - key: role + operator: In + values: + - {{ $nodeName }} + {{- end }} + {{- end }} + nodeAffinity: + {{- if eq $.Values.custom_deployment.enabled true }} + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node.kubernetes.io/instance-type + operator: In + values: + {{- range $.Values.custom_deployment.instance_type }} + - {{ . }} + {{- end }} + {{- else }} + preferredDuringSchedulingIgnoredDuringExecution: + {{- toYaml $.Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} + terminationGracePeriodSeconds: {{ $.Values.terminationGracePeriod }} + {{- end }} + volumeClaimTemplates: + - metadata: + name: {{ $.Values.esClusterName }}-{{ $nodeName }} + spec: +{{- $tierType := $.Values.deploymentTier | default "" }} + {{- if eq $tierType "Enterprise" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate $nodeName) | indent 6 }} + {{- else if eq $tierType "Basic" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate_basic $nodeName) | indent 6 }} + {{- else if eq $tierType "Standard" }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate_standard $nodeName) | indent 6 }} + {{- else }} +{{ toYaml (index $.Values.isolation.volumeClaimTemplate $nodeName) | indent 6 }} + {{- end }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml index 6b51efdfe05..0b90195d863 100755 --- a/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml +++ b/helm/atlas-read/charts/elasticsearch-read/templates/rolebinding.yaml @@ -18,7 +18,7 @@ subjects: {{- else }} name: {{ .Values.rbac.serviceAccountName | quote }} {{- end }} - namespace: {{ .Release.Namespace | quote }} + namespace: {{ .Values.Namespace | quote }} roleRef: kind: Role name: {{ $fullName | quote }} diff --git a/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml b/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml index 69dee92bbe9..c73edb594d3 100755 --- a/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml +++ b/helm/atlas-read/charts/elasticsearch-read/templates/statefulset.yaml @@ -76,7 +76,7 @@ spec: {{- end }} {{- with .Values.nodeSelector }} nodeSelector: - {{- toYaml . | nindent 8 }} +{{ toYaml . | indent 8 }} {{- end }} {{- if or (eq .Values.antiAffinity "hard") (eq .Values.antiAffinity "soft") .Values.nodeAffinity }} {{- if .Values.priorityClassName }} @@ -119,8 +119,13 @@ spec: - {{ . }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- toYaml .Values.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} terminationGracePeriodSeconds: {{ .Values.terminationGracePeriod }} volumes: diff --git a/helm/atlas-read/charts/elasticsearch-read/values.yaml b/helm/atlas-read/charts/elasticsearch-read/values.yaml index 1b2be74d897..c0320e2047b 100755 --- a/helm/atlas-read/charts/elasticsearch-read/values.yaml +++ b/helm/atlas-read/charts/elasticsearch-read/values.yaml @@ -4,6 +4,7 @@ multiarch: enabled: false image: {} + clusterName: "atlas-elasticsearch-read" nodeGroup: "master" @@ -11,6 +12,7 @@ nodeGroup: "master" # This should be set to clusterName + "-" + nodeGroup for your master group global: Tier_Type: "" + Deployment_Type: "" masterService: "" # Elasticsearch roles that will be applied to this nodeGroup @@ -23,6 +25,7 @@ roles: replicas: 3 esMajorVersion: 7 minimumMasterNodes: 1 + # JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage # This is simpler and more reliable than manual calculation esJavaOpts: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" @@ -34,7 +37,6 @@ esConfig: http.max_content_length: 2147483647b ingest.geoip.downloader.enabled: false xpack.security.enabled: false - reindex.remote.whitelist: ["atlas-elasticsearch-master:9200","atlas-elasticsearch-master-0:9200","atlas-elasticsearch-master-1:9200","atlas-elasticsearch-master-2:9200"] log4j2.properties: | -Dlog4j2.formatMsgNoLookups=true @@ -186,19 +188,24 @@ nodeAffinity: - key: cloud.google.com/gke-provisioning #GCP operator: In values: - - standard - - # requiredDuringSchedulingIgnoredDuringExecution: - # nodeSelectorTerms: - # - matchExpressions: - # - key: nodegroup - # operator: NotIn - # values: - # - atlan-spark - # - key: lifecycle - # operator: In - # values: - # - None + - standard + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard # The default is to deploy all pods serially. By setting this to parallel all pods are started at # the same time when bootstrapping the cluster @@ -247,11 +254,11 @@ terminationGracePeriod: 120 sysctlVmMaxMapCount: 262144 readinessProbe: - failureThreshold: 6 - initialDelaySeconds: 30 + failureThreshold: 3 + initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 3 - timeoutSeconds: 15 + timeoutSeconds: 5 custom_deployment: enabled: false @@ -259,7 +266,7 @@ custom_deployment: - m6a.2xlarge # https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html#request-params wait_for_status -clusterHealthCheckParams: "wait_for_status=yellow&timeout=5s" +clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s" # Timeout for readiness probe health check when ES is already running healthCheckProbeTimeout: "1s" @@ -324,3 +331,152 @@ backup: image: ghcr.io/atlanhq/alpine-curl-atlan-v2:3.21.0 #imageTag: latest schedule: '0 3 * * *' + +esClusterName: "atlas-elasticsearch2" +deploymentTier: Enterprise +isolation: + enabled: false + names: + - "ui-search" + - "non-ui-search" + replicaCount: + ui-search: 3 + non-ui-search: 3 + + resources_enterprise: + master: + requests: + cpu: "100m" + memory: "3Gi" + limits: + cpu: "1000m" + memory: "4Gi" + ui-search: + requests: + cpu: "2" + memory: "6Gi" + limits: + cpu: "2" + memory: "7Gi" + non-ui-search: + requests: + cpu: "2" + memory: "6Gi" + limits: + cpu: "2" + memory: "7Gi" + + resources_standard: + master: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + ui-search: + requests: + cpu: "1000m" + memory: "4Gi" + limits: + cpu: "2000m" + memory: "4Gi" + non-ui-search: + requests: + cpu: "1000m" + memory: "4Gi" + limits: + cpu: "2000m" + memory: "4Gi" + + resources_basic: + master: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "1Gi" + ui-search: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + non-ui-search: + requests: + cpu: "500m" + memory: "2Gi" + limits: + cpu: "1000m" + memory: "2Gi" + + # JVM automatically uses 50% of container memory as max heap using MaxRAMPercentage for isolation mode + esJavaOpts: + master: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + ui-search: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + non-ui-search: "-XX:MaxRAMPercentage=50.0 -XX:+UnlockExperimentalVMOptions -XX:+UseG1GC -XX:MaxGCPauseMillis=200 -XX:G1HeapRegionSize=16m -XX:G1NewSizePercent=30 -XX:G1MaxNewSizePercent=40 -XX:+G1UseAdaptiveIHOP -XX:G1MixedGCCountTarget=8" + + roles: + master: + - "master" + ui-search: + - "data" + - "ingest" + non-ui-search: + - "data" + - "ingest" + # Enterprise tier storage + volumeClaimTemplate: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 200Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 200Gi + + # Standard tier storage + volumeClaimTemplate_standard: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 100Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 100Gi + + # Basic tier storage + volumeClaimTemplate_basic: + master: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 20Gi + ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 50Gi + non-ui-search: + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 50Gi \ No newline at end of file diff --git a/helm/atlas-read/templates/atlas-audit-index-configmap.yaml b/helm/atlas-read/templates/atlas-audit-index-configmap.yaml index 24a3711296f..81ef471809e 100644 --- a/helm/atlas-read/templates/atlas-audit-index-configmap.yaml +++ b/helm/atlas-read/templates/atlas-audit-index-configmap.yaml @@ -151,18 +151,4 @@ data: } } ' - - - - - - - - - - - - - - -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/configmap.yaml b/helm/atlas-read/templates/configmap.yaml index 8ad0e049a23..d147f850a26 100644 --- a/helm/atlas-read/templates/configmap.yaml +++ b/helm/atlas-read/templates/configmap.yaml @@ -155,9 +155,8 @@ data: atlas.graph.index.search.elasticsearch.client-only=true atlas.graph.index.search.elasticsearch.retry_on_conflict=5 atlas.graph.index.search.max-result-set-size=1000 - atlas.index.audit.elasticsearch.total_field_limit=10000 - atlas.index.audit.elasticsearch.refresh_interval: 1s - + atlas.index.audit.elasticsearch.total_field_limit={{ .Values.atlas.index.audit_index_field_limit }} + atlas.index.audit.elasticsearch.refresh_interval={{ .Values.atlas.index.audit_index_refresh_interval }} # Solr-specific configuration property # atlas.graph.index.search.max-result-set-size=150 @@ -435,6 +434,7 @@ data: atlas.graph.metrics.enabled = true atlas.graph.metrics.jmx.enabled = true atlas.statsd.enable = true + ########## Atlas deferred-actions (background tasks) ############# @@ -468,7 +468,7 @@ data: atlas.metrics.uri_patterns=/api/(meta|atlas/v2)/glossary/terms/[^/]+/assignedEntities,/api/(meta|atlas/v2)/lineage/[^/]+,/api/(meta|atlas/v2)/lineage/list,/api/(meta|atlas/v2)/entity/accessors,/api/(meta|atlas/v2)/entity/auditSearch,/api/(meta|atlas/v2)/entity/bulk,/api/(meta|atlas/v2)/entity/bulk/setClassifications,/api/(meta|atlas/v2)/entity/bulk/uniqueAttribute,/api/(meta|atlas/v2)/entity/evaluator,/api/(meta|atlas/v2)/entity/guid/[^/]+,/api/(meta|atlas/v2)/entity/guid/[^/]+/businessmetadata,/api/(meta|atlas/v2)/entity/uniqueAttribute/type/[^/]+,/api/(meta|atlas/v2)/search/indexsearch,/api/(meta|atlas/v2)/entity/repairhaslineage,/api/(meta|atlas/v2)/types/typedef/name/[^/]+,/api/(meta|atlas/v2)/types/typedefs,/api/atlas/admin/metrics/prometheus,/api/atlas/admin/pushMetricsToStatsd,/api/atlas/v2/auth/download/policies/[^/]+,/api/atlas/v2/auth/download/roles/[^/]+,/api/atlas/v2/auth/download/users/[^/]+,/api/meta/entity/uniqueAttribute/type/[^/]+,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/admin-events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/events,/auth/admin/realms/[^/]+/groups,/auth/admin/realms/[^/]+/groups/[^/]+/role-mappings/realm,/auth/admin/realms/[^/]+/roles,/auth/admin/realms/[^/]+/roles-by-id/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+,/auth/admin/realms/[^/]+/roles/[^/]+/composites,/auth/admin/realms/[^/]+/roles/[^/]+/groups,/auth/admin/realms/[^/]+/roles/[^/]+/users,/auth/admin/realms/[^/]+/users,/auth/admin/realms/[^/]+/users/[^/]+/groups,/auth/admin/realms/[^/]+/users/[^/]+/role-mappings/realm,/auth/realms/[^/]+/protocol/openid-connect/token,/auth/realms/[^/]+/protocol/openid-connect/token/introspect,/users/mappings,/roles/mappings,/api/(meta|atlas/v2)/business-policy/[^/]+/unlink-business-policy,/api/(meta|atlas/v2)/business-policy/link-business-policy,/api/(meta|atlas/v2)/direct/search,/api/(meta|atlas/v2)/attribute/update atlas.metrics.method_level.enable=true - atlas.metrics.method_patterns=policiesPrefetchFailed,processTermAssignments,elasticSearchQuery,elasticQueryTimeout,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,scrubEntityHeader,getDiffResult + atlas.metrics.method_patterns=policiesPrefetchFailed,mapVertexToAtlasEntityHeaderWithoutPrefetch,mapVertexToAtlasEntityHeaderWithPrefetch,processTermAssignments,elasticSearchQuery,mapVertexToAtlasEntityHeader,elasticQueryTimeout,getAllClassifications,scrubSearchResults,getAdjacentEdgesByLabel,preCreateOrUpdate,createOrUpdate,mapAttributes,graphCommit,getAtlasLineageInfo,getLineageInfoOnDemand,getLineageListInfoOnDemand,repairHasLineageForAssetGetById,repairHasLineageForAssetGetRelations,repairHasLineageForRequiredAsset,repairHasLineage,getRelationshipEdge,hasEdges,getEdgeBetweenVertices,removeHasLineageOnDelete,resetHasLineageOnInputOutputDelete,updateAssetHasLineageStatus,isAccessAllowed {{ if .Values.atlas.janusgraph.atomic_mutation }} ### Atomic batch related configs ### atlas.graph.storage.cql.atomic-batch-mutate={{ .Values.atlas.janusgraph.atomic_mutation }} @@ -476,29 +476,30 @@ data: {{ end }} ######### Canary-Release ######### - atlas.canary.keycloak.token-introspection = {{ .Values.atlas.keycloak.token_introspection}} - atlas.keycloak.introspection.use.cache = {{ .Values.atlas.keycloak.introspection_cache }} - + atlas.canary.keycloak.token-introspection = true + atlas.keycloak.introspection.use.cache = true + + ######### Atlas Maintenance Mode ######### + atlas.maintenance.mode={{ .Values.atlas.maintenanceMode }} + ######### Atlas Inddexsearch configs ######### atlas.indexsearch.enable.api.limit={{ .Values.atlas.indexsearch.enable_api_limit }} atlas.indexsearch.query.size.max.limit={{ .Values.atlas.indexsearch.query_size_max_limit }} atlas.indexsearch.async.enable={{ .Values.atlas.indexsearch.enable_async }} atlas.indexsearch.async.search.keep.alive.time.in.seconds={{ .Values.atlas.indexsearch.request_timeout_in_secs }} atlas.indexsearch.enable.janus.optimization={{ .Values.atlas.indexsearch.enable_janus_optimization }} + atlas.indexsearch.request.isolation.enable={{ .Values.atlas.indexsearch.enable_request_isolation }} atlas.indexsearch.enable.janus.optimization.for.relationship={{ .Values.atlas.indexsearch.enable_janus_optimization_for_relationship }} atlas.indexsearch.enable.janus.optimization.extended={{ .Values.atlas.indexsearch.enable_janus_optimization_extended }} atlas.indexsearch.enable.janus.optimization.for.classifications={{ .Values.atlas.indexsearch.enable_janus_optimization_for_classifications }} - atlas.indexsearch.enable.janus.optimization.for.lineage={{ .Values.atlas.indexsearch.enable_janus_optimization_for_lineage }} - atlas.jg.super.vertex.edge.count={{ .Values.atlas.jg.super_vertex_edge_count }} - atlas.jg.super.vertex.edge.timeout={{ .Values.atlas.jg.super_vertex_edge_timeout }} + atlas.jg.super.vertex.min.edge.count={{ .Values.atlas.jg.super.vertex.min.edge.count }} ######### Atlas Bulk API configs ######### atlas.bulk.api.max.entities.allowed={{ .Values.atlas.bulk.max_entities_allowed }} - atlas.bulk.api.enable.janus.optimization={{ .Values.atlas.bulk.enable_janus_optimization }} - ######### Atlas Lineage configs ######### + ######### Atlas Lineage configs ######### atlas.lineage.optimised.calculation={{ .Values.atlas.lineage.optimised_calculation }} - atlas.lineage.enable.connection.lineage={{ .Values.atlas.lineage.enable_connection_lineage }} + ######### Atlas Distributed Task configs ######### atlas.distributed.task.enabled={{ .Values.atlas.distributed_task.enabled }} {{- if eq .Values.atlas.distributed_task.enabled true }} @@ -506,9 +507,8 @@ data: atlas.relationship.cleanup.supported.relationship.labels={{ .Values.atlas.distributed_task.cleanup_supported_relationship_labels }} {{- end }} - ######### Atlas Typedefs update configs ######### + ######### Atlas Typedefs update configs ######### atlas.types.update.async.enable={{ .Values.atlas.types_update.async_enable }} atlas.types.update.thread.count={{ .Values.atlas.types_update.thread_count }} atlas.typedef.lock.name={{ .Values.atlas.types_update.lock_name }} {{- end }} - diff --git a/helm/atlas-read/templates/deployment.yaml b/helm/atlas-read/templates/deployment.yaml index 1f71ec45528..5504e8503ec 100644 --- a/helm/atlas-read/templates/deployment.yaml +++ b/helm/atlas-read/templates/deployment.yaml @@ -1,9 +1,8 @@ {{- if and (.Values.deploy) (.Values.deploy.enabled) }} -{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} apiVersion: apps/v1 kind: Deployment metadata: - name: atlas-read + name: {{ .Values.atlas.name }} namespace: {{ .Values.Namespace }} labels: app: {{ template "name" . }} @@ -29,6 +28,10 @@ spec: annotations: {{ toYaml .Values.atlas.podAnnotations | indent 8 }} spec: + {{- with .Values.atlas.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} affinity: nodeAffinity: @@ -55,8 +58,13 @@ spec: {{- end }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} {{- if eq .Values.atlas.podAntiAffinity true }} podAntiAffinity: @@ -70,19 +78,6 @@ spec: nodeSelector: {{- toYaml . | nindent 8 }} {{- end }} - {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} - {{- if or .Values.atlas.tolerations $multiarchEnabled }} - tolerations: - {{- if .Values.atlas.tolerations }} - {{ toYaml .Values.atlas.tolerations | nindent 8 }} - {{- end }} - {{- if $multiarchEnabled }} - - key: "archtype" - operator: "Equal" - value: "arm64" - effect: "NoSchedule" - {{- end }} - {{- end }} initContainers: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} @@ -103,6 +98,7 @@ spec: ports: - containerPort: {{ .Values.atlas.service.targetPort }} env: + {{- toYaml .Values.atlas.env | nindent 12 }} - name: K8S_POD_IP valueFrom: fieldRef: @@ -134,7 +130,6 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES value: >- k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- toYaml .Values.atlas.env | nindent 12 }} {{- if eq .Values.albTenant true }} - name: ALB_ENABLED value: "true" @@ -239,4 +234,3 @@ spec: name: atlas-read-telegrafd {{- end }} {{- end }} -{{- end }} diff --git a/helm/atlas-read/templates/hpa.yaml b/helm/atlas-read/templates/hpa.yaml index a5abf61df3d..63f2ebe374a 100644 --- a/helm/atlas-read/templates/hpa.yaml +++ b/helm/atlas-read/templates/hpa.yaml @@ -16,7 +16,7 @@ spec: scaleTargetRef: apiVersion: apps/v1 kind: Deployment - name: {{ .Values.hpa.name }} + name: {{ .Values.atlas.name }} metrics: - type: Resource resource: diff --git a/helm/atlas-read/templates/nginx-cm.yaml b/helm/atlas-read/templates/nginx-cm.yaml new file mode 100644 index 00000000000..3799e2b070a --- /dev/null +++ b/helm/atlas-read/templates/nginx-cm.yaml @@ -0,0 +1,320 @@ +{{- if and .Values.nginx.enabled (or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled)) }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: rate-limit-nginx-config-read + namespace: {{ .Values.Namespace }} +data: + nginx.conf: |- + # Load the dynamic VTS module (must be at the top level) + load_module /etc/nginx/modules/ngx_http_vhost_traffic_status_module.so; + + user nginx; + worker_processes auto; + error_log /dev/stderr warn; + pid /var/run/nginx.pid; + + events { + worker_connections 1024; + } + + http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Set a higher limit for the client request body size to prevent 413 errors + client_max_body_size {{ .Values.nginx.clientMaxBodySize | default "512m" }}; + client_body_buffer_size {{ .Values.nginx.clientBodyBufferSize | default "512k" }}; + + # Increased timeout for client sending request body to 10 minutes + client_body_timeout {{ .Values.nginx.clientBodyTimeout | default "600s" }}; + + # Map for the general API rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $effective_agent_id_key { + default {{ .Values.nginx.default.header }}; # Default to rate limiting by agent_id + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit) + } + + # Map for the /api/meta/entity/bulk path-based rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $bulk_path_key { + default {{ .Values.nginx.bulk.header }}; # Default to rate limiting by agent_id + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit) + } + + # Map for the /api/meta/search/indexsearch path-based rate limiting key & product_webapp bypass + map $http_x_atlan_client_origin $indexsearch_path_key { + default {{ .Values.nginx.indexsearch.header }}; # Default to rate limiting by agent_id for these paths + "product_webapp" ""; # For "product_webapp", use an empty key (bypasses limit for this path) + } + + log_format ratelimit_json escape=json '{' + '"time_local": "$time_local", ' + '"remote_addr": "$remote_addr", ' + '"remote_user": "$remote_user", ' + '"request_method": "$request_method", ' + '"request_uri": "$request_uri", ' + '"request_protocol": "$server_protocol", ' + '"request_length": "$request_length", ' + '"status": "$status", ' + '"body_bytes_sent": "$body_bytes_sent", ' + '"bytes_sent": "$bytes_sent", ' + '"http_referer": "$http_referer", ' + '"http_user_agent": "$http_user_agent", ' + '"http_x_forwarded_for": "$http_x_forwarded_for", ' + '"http_x_atlan_client_origin": "$http_x_atlan_client_origin", ' + '"http_x_atlan_agent": "$http_x_atlan_agent", ' + '"http_x_atlan_agent_id": "$http_x_atlan_agent_id", ' + '"http_x_atlan_package_name": "$http_x_atlan_package_name", ' + '"http_x_atlan_agent_workflow_id": "$http_x_atlan_agent_workflow_id", ' + '"http_x_atlan_via_ui": "$http_x_atlan_via_ui", ' + '"http_x_atlan_request_id": "$http_x_atlan_request_id", ' + '"http_x_atlan_google_sheets_id": "$http_x_atlan_google_sheets_id", ' + '"http_x_atlan_microsoft_excel_id": "$http_x_atlan_microsoft_excel_id", ' + '"http_x_atlan_task_guid": "$http_x_atlan_task_guid", ' + '"http_x_atlan_route": "$http_x_atlan_route", ' + '"effective_agent_id_key_used": "$effective_agent_id_key", ' + '"bulk_path_key_used": "$bulk_path_key", ' + '"indexsearch_path_key_used": "$indexsearch_path_key", ' + '"limit_req_status": "$limit_req_status", ' + '"request_time": "$request_time", ' + '"upstream_addr": "$upstream_addr", ' + '"upstream_status": "$upstream_status", ' + '"upstream_response_time": "$upstream_response_time", ' + '"upstream_connect_time": "$upstream_connect_time", ' + '"upstream_header_time": "$upstream_header_time"' + '}'; + + access_log /dev/stdout ratelimit_json; # Logging to container standard output + + # Define rate limit zones + limit_req_zone $effective_agent_id_key zone=agent_id_zone:{{ .Values.nginx.default.zoneMemory | default "10m" }} rate={{ .Values.nginx.default.rate | default "500r" }}/{{ .Values.nginx.default.rateUnit | default "m" }}; + limit_req_zone $indexsearch_path_key zone=indexsearch_limit_zone:{{ .Values.nginx.indexsearch.zoneMemory | default "10m" }} rate={{ .Values.nginx.indexsearch.rate | default "500r" }}/{{ .Values.nginx.indexsearch.rateUnit | default "m" }}; + limit_req_zone $bulk_path_key zone=entity_bulk_limit_zone:{{ .Values.nginx.bulk.zoneMemory | default "10m" }} rate={{ .Values.nginx.bulk.rate | default "200r" }}/{{ .Values.nginx.bulk.rateUnit | default "m" }}; + + # Map for VTS filtering by URL group (path-based categorization) + map $request_uri $url_group { + "~*^/api/atlas/v2/search/indexsearch" "api_atlas_v2_indexsearch"; + "~*^/api/meta/search/indexsearch" "api_meta_indexsearch"; + "~*^/api/atlas/v2/entity/bulk" "api_atlas_v2_entitybulk"; + "~*^/api/meta/entity/bulk" "api_meta_entitybulk"; + "~*^/api/meta/entity/auditSearch" "api_meta_entity_auditsearch"; + "~*^/api/atlas/v2/entity/auditSearch" "api_atlas_v2_entity_auditsearch"; + "~*^/api/meta/types/typedefs" "api_meta_types_typedefs"; + "~*^/api/atlas/v2/types/typedefs" "api_atlas_v2_types_typedefs"; + "~*^/api/meta/lineage/list" "api_meta_lineage_list"; + "~*^/api/atlas/v2/lineage/list" "api_atlas_v2_lineage_list"; + "~*^/api/atlas" "api_atlas_other"; + "~*^/api/meta" "api_meta_other"; + default "other_paths"; + } + + # VTS Module Configuration + vhost_traffic_status_zone shared:vhost_traffic_status:32m; + # Filter by exact status code and then by URL group + vhost_traffic_status_filter_by_set_key $status $url_group; + + # Server for Nginx stub_status (internal) + server { + listen 127.0.0.1:8081; + server_name localhost; + location /nginx_status { + stub_status; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + } + + # Server for Nginx VTS (internal) + server { + listen 127.0.0.1:8082; + server_name localhost; + location /vts_status { + vhost_traffic_status_display; + vhost_traffic_status_display_format prometheus; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + location /vts_status_html { + vhost_traffic_status_display; + vhost_traffic_status_display_format html; + access_log off; + vhost_traffic_status_bypass_stats on; + allow 127.0.0.1; + deny all; + } + } + + # Main application server + server { + listen 8080; + set $atlas_upstream http://127.0.0.1:21000; + + # Common proxy headers to be included in relevant locations + # Standard headers + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + + # Custom Atlan headers + proxy_set_header x-atlan-agent $http_x_atlan_agent; + proxy_set_header x-atlan-agent-id $http_x_atlan_agent_id; + proxy_set_header x-atlan-package-name $http_x_atlan_package_name; + proxy_set_header x-atlan-agent-workflow-id $http_x_atlan_agent_workflow_id; + proxy_set_header x-atlan-via-ui $http_x_atlan_via_ui; + proxy_set_header x-atlan-request-id $http_x_atlan_request_id; + proxy_set_header x-atlan-google-sheets-id $http_x_atlan_google_sheets_id; + proxy_set_header x-atlan-microsoft-excel-id $http_x_atlan_microsoft_excel_id; + proxy_set_header x-atlan-task-guid $http_x_atlan_task_guid; + proxy_set_header x-atlan-client-origin $http_x_atlan_client_origin; + proxy_set_header x-atlan-route $http_x_atlan_route; + + # Most specific locations first + location = /api/meta/search/indexsearch { + limit_req zone=indexsearch_limit_zone burst={{ .Values.nginx.indexsearch.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; # This is an outgoing header to client + } + + location = /api/atlas/v2/search/indexsearch { # Exact match for this specific path + limit_req zone=indexsearch_limit_zone burst={{ .Values.nginx.indexsearch.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; # If needed + } + + location = /api/meta/entity/bulk { # Exact match for this specific path + limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.bulk.burst }} nodelay; # Adjust burst as needed + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/entity/bulk { + limit_req zone=entity_bulk_limit_zone burst={{ .Values.nginx.bulk.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # New locations for additional metrics - applying general API rate limit + location = /api/meta/entity/auditSearch { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/entity/auditSearch { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/meta/types/typedefs { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/types/typedefs { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/meta/lineage/list { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location = /api/atlas/v2/lineage/list { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream$request_uri; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # General API prefixes (will catch remaining /api/meta/* and /api/atlas/*) + location /api/meta/ { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; # General API limit + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + location /api/atlas/ { + limit_req zone=agent_id_zone burst={{ .Values.nginx.default.burst }} nodelay; # General API limit + limit_req_status 429; + add_header Retry-After 1 always; + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + add_header X-Debug-Agent-Id $http_x_atlan_agent_id; + } + + # Default location for all other requests (UI, etc.) + location / { + proxy_pass $atlas_upstream; + proxy_connect_timeout {{ .Values.nginx.proxyConnectTimeout | default "60s" }}; + proxy_read_timeout {{ .Values.nginx.proxyReadTimeout | default "7200s" }}; + proxy_next_upstream error timeout invalid_header http_500 http_502 http_503 http_504; + } + } + } +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/pdb.yaml b/helm/atlas-read/templates/pdb.yaml new file mode 100644 index 00000000000..b2716f55696 --- /dev/null +++ b/helm/atlas-read/templates/pdb.yaml @@ -0,0 +1,25 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.podDisruptionBudget.enabled }} +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: atlas-read-pdb + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + {{- if .Values.podDisruptionBudget.minAvailable }} + minAvailable: {{ .Values.podDisruptionBudget.minAvailable }} + {{- end }} + {{- if .Values.podDisruptionBudget.maxUnavailable }} + maxUnavailable: {{ .Values.podDisruptionBudget.maxUnavailable }} + {{- end }} + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} +{{- end }} +{{- end }} diff --git a/helm/atlas-read/templates/podmonitor.yaml b/helm/atlas-read/templates/podmonitor.yaml new file mode 100644 index 00000000000..1f78f073fd4 --- /dev/null +++ b/helm/atlas-read/templates/podmonitor.yaml @@ -0,0 +1,39 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.podMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: atlas-read-podmonitor + {{- if .Values.atlas.podMonitor.namespace }} + namespace: {{ .Values.atlas.podMonitor.namespace }} + {{- end }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- toYaml .Values.atlas.podMonitor.labels | nindent 4 }} +spec: + podMetricsEndpoints: + - interval: {{ .Values.atlas.podMonitor.interval }} + {{- if .Values.atlas.podMonitor.scrapeTimeout }} + scrapeTimeout: {{ .Values.atlas.podMonitor.scrapeTimeout }} + {{- end }} + honorLabels: true + port: telegrafd + path: /metrics + scheme: {{ .Values.atlas.podMonitor.scheme }} + {{- if .Values.atlas.podMonitor.relabelings }} + relabelings: + {{- toYaml .Values.atlas.podMonitor.relabelings | nindent 4 }} + {{- end }} + jobLabel: "atlas-podMonitor-job" + selector: + matchLabels: + app: {{ template "name" . }} + release: {{ .Release.Name }} + namespaceSelector: + matchNames: + - atlas +{{- end }} +{{- end }} diff --git a/helm/atlas-read/templates/ratelimited-service.yaml b/helm/atlas-read/templates/ratelimited-service.yaml new file mode 100644 index 00000000000..772a80be767 --- /dev/null +++ b/helm/atlas-read/templates/ratelimited-service.yaml @@ -0,0 +1,32 @@ +{{- if and .Values.nginx.enabled (or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled)) }} +apiVersion: v1 +kind: Service +metadata: + name: atlas-read-ratelimited + namespace: {{ .Values.Namespace }} + labels: + app: {{ template "name" . }} + chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} +spec: + type: ClusterIP + ports: + {{- if and .Values.nginx.enabled .Values.nginx.ratelimit.enabled }} + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + {{- else }} + - port: {{ .Values.atlas.service.port }} + targetPort: {{ .Values.atlas.service.targetPort }} + protocol: TCP + name: {{ .Values.atlas.service.portName }} + {{- end }} + selector: + app: {{ template "name" . }} + release: {{ .Release.Name }} + internalTrafficPolicy: Cluster + ipFamilyPolicy: SingleStack + sessionAffinity: None +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/service.yaml b/helm/atlas-read/templates/service.yaml index 94a94ca187c..2721b839096 100644 --- a/helm/atlas-read/templates/service.yaml +++ b/helm/atlas-read/templates/service.yaml @@ -11,7 +11,7 @@ metadata: {{- else if eq .Values.global.cloud "azure" }} konghq.com/path: /api/atlas/v2/ konghq.com/plugins: svc-rate-limit - {{- else }} + {{- else if eq .Values.albTenant false }} konghq.com/path: /api/atlas/v2/ konghq.com/plugins: svc-rate-limit {{- end }} @@ -31,11 +31,18 @@ spec: type: ClusterIP {{- end }} ports: + {{- if and .Values.nginx.enabled .Values.nginx.ratelimit.default_atlas_service }} + - port: 80 + targetPort: 8080 + protocol: TCP + name: http + {{- else }} - port: {{ .Values.atlas.service.port }} targetPort: {{ .Values.atlas.service.targetPort }} protocol: TCP name: {{ .Values.atlas.service.portName }} + {{- end }} selector: app: {{ template "name" . }} release: {{ .Release.Name }} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/statefulset.yaml b/helm/atlas-read/templates/statefulset.yaml index 2efdd9c7c09..57e8fb3ce89 100644 --- a/helm/atlas-read/templates/statefulset.yaml +++ b/helm/atlas-read/templates/statefulset.yaml @@ -2,7 +2,7 @@ apiVersion: apps/v1 kind: StatefulSet metadata: - name: atlas-read + name: {{ .Values.atlas.name }} namespace: {{ .Values.Namespace }} labels: app: {{ template "name" . }} @@ -10,7 +10,7 @@ metadata: release: {{ .Release.Name }} heritage: {{ .Release.Service }} annotations: - configmap.reloader.stakater.com/reload: "atlas-read-config,atlas-read-logback-config,atlas-read-audit-index,atlas-read-keycloak-config,atlas-read-init-script,atlas-read-init-container-script" + configmap.reloader.stakater.com/reload: "atlas-read-config,atlas-read-logback-config,atlas-read-audit-index,atlas-read-keycloak-config,atlas-read-init-script,atlas-read-init-container-script,rate-limit-nginx-config-read" secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-read-keycloak-config" argocd.argoproj.io/sync-wave: "1" spec: @@ -28,6 +28,10 @@ spec: annotations: {{ toYaml .Values.atlas.podAnnotations | indent 8 }} spec: + {{- with .Values.atlas.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} affinity: nodeAffinity: @@ -54,17 +58,13 @@ spec: {{- end }} {{- end }} {{- else }} + {{- if eq .Values.global.Deployment_Type "Development" }} preferredDuringSchedulingIgnoredDuringExecution: - {{- if and .Values.multiarch (hasKey .Values.multiarch "enabled") (eq .Values.multiarch.enabled true) }} - - weight: 100 - preference: - matchExpressions: - - key: kubernetes.io/arch - operator: In - values: - - arm64 - {{- end }} {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} + {{- else }} + requiredDuringSchedulingIgnoredDuringExecution: + {{- toYaml .Values.atlas.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} + {{- end }} {{- end }} {{- if eq .Values.atlas.podAntiAffinity true }} podAntiAffinity: @@ -74,23 +74,6 @@ spec: {{- if .Values.priorityClassName }} priorityClassName: {{ .Values.priorityClassName }} {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- $multiarchEnabled := and .Values.multiarch (eq .Values.multiarch.enabled true) }} - {{- if or .Values.atlas.tolerations $multiarchEnabled }} - tolerations: - {{- if .Values.atlas.tolerations }} - {{ toYaml .Values.atlas.tolerations | nindent 8 }} - {{- end }} - {{- if $multiarchEnabled }} - - key: "archtype" - operator: "Equal" - value: "arm64" - effect: "NoSchedule" - {{- end }} - {{- end }} initContainers: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} @@ -98,18 +81,19 @@ spec: containers: - name: {{ .Chart.Name }}-main command: [ - "/bin/bash", - "-c", - "/create-atlas-keycloak-config.sh; - /env_change.sh; - /opt/apache-atlas/bin/atlas_start.py; - tail -F /opt/apache-atlas/logs/*.log;" - ] + "/bin/bash", + "-c", + "/create-atlas-keycloak-config.sh; + /env_change.sh; + /opt/apache-atlas/bin/atlas_start.py; + tail -F /opt/apache-atlas/logs/*.log;" + ] image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} ports: - containerPort: {{ .Values.atlas.service.targetPort }} env: + {{- toYaml .Values.atlas.env | nindent 12 }} - name: K8S_POD_IP valueFrom: fieldRef: @@ -141,21 +125,20 @@ spec: - name: OTEL_RESOURCE_ATTRIBUTES value: >- k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- toYaml .Values.atlas.env | nindent 12 }} {{- if eq .Values.albTenant true }} - name: ALB_ENABLED value: "true" {{- end }} envFrom: - - secretRef: - name: atlas-read-keycloak-config + - secretRef: + name: atlas-read-keycloak-config {{- if .Values.multitenant }} - - secretRef: - name: atlas-secret-manager - - secretRef: - name: atlas-secret-parameter-store - - secretRef: - name: instance-domain-name + - secretRef: + name: atlas-secret-manager + - secretRef: + name: atlas-secret-parameter-store + - secretRef: + name: instance-domain-name {{- end }} resources: {{- $tierType := .Values.global.Tier_Type | default "" }} @@ -203,10 +186,35 @@ spec: volumeMounts: - name: telegraf-conf mountPath: /etc/telegraf/ + {{- if .Values.nginx.enabled }} + - name: nginx-log-volume + mountPath: /var/log/nginx + readOnly: true + {{- end }} ports: - name: telegrafd containerPort: 9273 {{- end }} + {{- if .Values.nginx.enabled }} + - name: nginx-ratelimit + image: ghcr.io/atlanhq/nginx-vts-atlan-v2:1.27.5.1-multiarch + ports: + - containerPort: 8080 + protocol: TCP + resources: + limits: + cpu: 200m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-log-volume + mountPath: /var/log/nginx + {{- end }} {{- if .Values.atlas.imagePullSecrets }} imagePullSecrets: {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} @@ -214,6 +222,8 @@ spec: volumes: - name: atlas-logs emptyDir: {} + - name: atlas-config-map-rw-vol + emptyDir: {} - name: atlas-read-config configMap: name: atlas-read-config @@ -236,11 +246,16 @@ spec: configMap: name: atlas-read-audit-index defaultMode: 0755 - - name: atlas-config-map-rw-vol - emptyDir: {} {{- if .Values.atlas.telegraf.enabled }} - name: telegraf-conf configMap: name: atlas-read-telegrafd {{- end }} + {{- if .Values.nginx.enabled }} + - name: nginx-log-volume + emptyDir: {} + - name: nginx-config + configMap: + name: rate-limit-nginx-config-read + {{- end }} {{- end }} diff --git a/helm/atlas-read/templates/statsd-cronjob.yaml b/helm/atlas-read/templates/statsd-cronjob.yaml index 5660a22ec49..1148af53da7 100644 --- a/helm/atlas-read/templates/statsd-cronjob.yaml +++ b/helm/atlas-read/templates/statsd-cronjob.yaml @@ -35,10 +35,6 @@ spec: {{ toYaml .Values.atlas.tolerations | nindent 10 }} {{- end }} {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} {{- if .Values.atlas.imagePullSecrets }} imagePullSecrets: {{- toYaml .Values.atlas.imagePullSecrets | nindent 12 }} @@ -62,4 +58,4 @@ spec: curl -X GET "http://atlas-read-service-atlas.atlas.svc.cluster.local/api/atlas/admin/pushMetricsToStatsd" restartPolicy: Never {{- end -}} -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/templates/telegraf-config.yaml b/helm/atlas-read/templates/telegraf-config.yaml index 328c2587b73..0d32b635d43 100644 --- a/helm/atlas-read/templates/telegraf-config.yaml +++ b/helm/atlas-read/templates/telegraf-config.yaml @@ -27,6 +27,25 @@ data: # quiet = false hostname = "" # Will be set by Telegraf automatically omit_hostname = false + {{- if and .Values.nginx.enabled .Values.nginx.ratelimit.enabled }} + # Input plugin for basic Nginx stub_status metrics + [[inputs.nginx]] + ## An array of Nginx stub_status URI to gather stats from. + urls = ["http://127.0.0.1:8081/nginx_status"] + ## Response timeout. + # response_timeout = "5s" + # Add new input plugin for Prometheus format from Nginx VTS + [[inputs.prometheus]] + ## An array of URLs to scrape Prometheus metrics from. + urls = ["http://127.0.0.1:8082/vts_status"] # URL of the VTS status endpoint + ## Metric version for parsing. Default is 0 (auto), try 2 if issues. + # metric_version = 0 # Or 2 for OpenMetrics if VTS outputs that + ## Timeout for scraping. + # response_timeout = "5s" + ## Optional: Add tags to metrics from this input + # [inputs.prometheus.tags] + # source = "nginx_vts" + {{- end }} [global_tags] deployment="atlas" [[inputs.http]] diff --git a/helm/atlas-read/templates/tls_secrets.yaml b/helm/atlas-read/templates/tls_secrets.yaml new file mode 100644 index 00000000000..bd25ed63bc5 --- /dev/null +++ b/helm/atlas-read/templates/tls_secrets.yaml @@ -0,0 +1,12 @@ +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} +{{- if .Values.atlas.ingress.tlsSecrets }} +apiVersion: v1 +kind: Secret +metadata: + name: tls-wildcard-atlas-read + namespace: {{ .Values.Namespace | default "default" }} +type: Opaque +data: +{{- toYaml .Values.atlas.ingress.tlsSecrets | nindent 8 }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas-read/values.yaml b/helm/atlas-read/values.yaml index bf2984701e0..4992bf53bfe 100644 --- a/helm/atlas-read/values.yaml +++ b/helm/atlas-read/values.yaml @@ -8,6 +8,7 @@ multiarch: # Declare variables to be passed into your templates. global: Tier_Type: "" + Deployment_Type: "" cloud: "" tenantName: "" svcIsolation: @@ -35,6 +36,8 @@ podDisruptionBudget: enabled: true minAvailable: "1" atlas: + name: atlas-read + maintenanceMode: false cache: enabled: false podAntiAffinity: true @@ -47,39 +50,36 @@ atlas: ranger: RANGER_PASSWORD: '{{repl ConfigOption "RangerPassword"}}' RANGER_SERVICE_URL: "http://ranger-service.ranger.svc.cluster.local:80/api/policy" + config: + entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" multitenant: '' Deployment_Type: '' replicaCount: 2 - config: - entities_allowed_large_attributes: "rawQueryText,variablesSchemaBase64,visualBuilderSchemaBase64,dataContractSpec,dataContractJson" janusgraph: atomic_mutation: true janusgraph_tx_buffer_size: 8192 - keycloak: - token_introspection: true - introspection_cache: false indexsearch: enable_api_limit: false query_size_max_limit: 100000 enable_async: true request_timeout_in_secs: 60 enable_janus_optimization: true + enable_request_isolation: false enable_janus_optimization_for_relationship: true - enable_janus_optimization_for_classifications: false enable_janus_optimization_extended: true - enable_janus_optimization_for_lineage: false jg: - super_vertex_edge_count: 100000 - super_vertex_edge_timeout: 30 + super: + vertex: + min: + edge: + count: 200 bulk: max_entities_allowed: 10000 - enable_janus_optimization: true lineage: - optimised_calculation: true - enable_connection_lineage: false + optimised_calculation: false authorizer: enable_delta_based_refresh: true - enable_abac: true + enable_abac: false index: audit_index_field_limit: 10000 audit_index_refresh_interval: 1s @@ -88,7 +88,7 @@ atlas: cleanup_supported_asset_types: "Process,AirflowTask" cleanup_supported_relationship_labels: "__Process.inputs,__Process.outputs,__AirflowTask.inputs,__AirflowTask.outputs" types_update: - async_enable: true + async_enable: false thread_count: 5 lock_name: "atlas-read:type-def:lock" @@ -106,6 +106,13 @@ atlas: affinity: nodeAffinity: preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND - weight: 1 preference: matchExpressions: @@ -119,7 +126,26 @@ atlas: - key: cloud.google.com/gke-provisioning #GCP operator: In values: - - standard + - standard + + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: eks.amazonaws.com/capacityType #AWS + operator: In + values: + - ON_DEMAND + - matchExpressions: + - key: lifecycle #Azure + operator: In + values: + - ondemand + - matchExpressions: + - key: cloud.google.com/gke-provisioning #GCP + operator: In + values: + - standard + podAntiAffinity: requiredDuringSchedulingIgnoredDuringExecution: - labelSelector: @@ -146,9 +172,9 @@ atlas: - curl -X GET http://localhost:21000/api/atlas/admin/killtheleader # Kubernetes ingress for atlas - # Primary ingress. all traffic is switched to secondary ingress. + # Primary ingress. ingress: - enabled: false + enabled: true serviceName: atlas-read-ui-service annotations: kubernetes.io/ingress.class: "kong" @@ -312,6 +338,35 @@ atlas: SENTRY_DSN_PROD: '' INSTANCE_NAME: '' + # Config for logagent sidecar for atlas. + # Used in logagent configmap and atlas statefulset. + filebeatLogagent: + image: + repository: ghcr.io/atlanhq/filebeat + tag: 6.8.4 + data: + filebeat.yml: |- + filebeat.inputs: + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/audit.log* + fields: + logtype: audit + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/metric.log + fields: + logtype: metric + - type: log + enabled: true + paths: + - /opt/apache-atlas/logs/atlas_perf.log + fields: + logtype: performance + output.logstash: + hosts: ['logstash-logstash:5044'] # Redis config for atlas # This is used in atlas configmap redis: @@ -343,13 +398,6 @@ atlas: # Flag to enable telegraf sidecar for metrics telegraf: enabled: true - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 300m - memory: 256Mi # Flag to enable statsD cronjob and schedule statsdJob: @@ -460,3 +508,47 @@ cassandra: memory: 1Gi #cpu: 1 destination: "" + +nginx: + enabled: true + clientMaxBodySize: "512m" # The maximum size of the request body. + clientBodyBufferSize: "512k" # The buffer size for reading the request body. In is nginx InMemory buffer size per request. Excessive request size will be written on disk defined by clientMaxBodySize. + clientBodyTimeout: "600s" # Allow clients up to 10 minutes to actively send their request body before Nginx times out the connection. + proxyReadTimeout: "10800s" # 3 hrs - Time taken to read a response from the atlas server. The workflow client will wait for 3 hrs for the response. + proxyConnectTimeout: "60s" # 1 min - Time taken to establish a connection to the atlas server. + ratelimit: + enabled: true + default_atlas_service: false + default: + zoneMemory: "20m" + rate: "500r" + rateUnit: "m" + burst: 20 + header: "$http_x_atlan_agent_id" + indexsearch: + zoneMemory: "20m" # Example: Zone memory, e.g., 10m, 20m + rate: "500r" # Example: Rate, e.g., 1000r (requests) + rateUnit: "m" # Example: Rate unit, e.g., m (minute), s (second), 1000r (requests per m minute) + burst: 100 # Example: Burst size + header: "$http_x_atlan_agent_id" # Example: Header name + bulk: + zoneMemory: "20m" + rate: "1000r" + rateUnit: "m" + burst: 100 + header: "$http_x_atlan_agent_id" + logging: + format: '$remote_addr - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$http_x_forwarded_for" "$http_x_atlan_client_origin" "$http_x_atlan_agent_id"' + vts: + zoneMemory: "32m" + +# Summary of Timeout Settings in Nginx + +# To Upstream (Atlas): +# proxy_connect_timeout: - Time taken to establish a connection to the atlas server. +# proxy_read_timeout: - Time taken to read a response from the atlas server. +# proxy_send_timeout: (defaults to proxy_read_timeout) - Time taken to send a request to the atlas server. +# From Client: +# client_body_timeout: - Time taken by the client to send the request body. +# client_header_timeout: - Time taken by the client to send the request header. +# keepalive_timeout: - This timeout applies to an idle client connection after Nginx has finished sending a response and is waiting for the next request on the same TCP connection. From 7d411009c5e12e864aeedc032efcca3f141db4ad Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 3 Nov 2025 10:55:49 +0530 Subject: [PATCH 4/7] make atlas dependencies oci --- .github/workflows/maven.yml | 167 +++++++++++++----- helm/atlas-read/Chart.yaml | 8 +- .../charts/cassandra-online-dc/values.yaml | 4 + .../elasticsearch-exporter-read/values.yaml | 4 + .../charts/elasticsearch-read/values.yaml | 4 + helm/atlas-read/values.yaml | 7 + helm/atlas/Chart.yaml | 6 + .../templates/poddisruptionbudget.yaml | 8 +- .../templates/synonym-configmap.yaml | 2 +- helm/atlas/values.yaml | 7 + 10 files changed, 170 insertions(+), 47 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index 9fddbd99731..c3958e172cc 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -38,7 +38,34 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - chart: ['atlas', 'atlas-read'] + include: + # Application charts + - chart: atlas + path: helm/atlas + requires_app_version: true + - chart: atlas-read + path: helm/atlas-read + requires_app_version: true + # Atlas infrastructure charts + - chart: cassandra + path: helm/atlas/charts/cassandra + requires_app_version: false + - chart: elasticsearch + path: helm/atlas/charts/elasticsearch + requires_app_version: false + - chart: logstash + path: helm/atlas/charts/logstash + requires_app_version: false + # Atlas-Read infrastructure charts + - chart: cassandra-online-dc + path: helm/atlas-read/charts/cassandra-online-dc + requires_app_version: false + - chart: elasticsearch-read + path: helm/atlas-read/charts/elasticsearch-read + requires_app_version: false + - chart: elasticsearch-exporter-read + path: helm/atlas-read/charts/elasticsearch-exporter-read + requires_app_version: false steps: - name: Checkout code @@ -50,8 +77,9 @@ jobs: version: '3.12.0' - name: Update helm dependencies + if: matrix.chart == 'atlas' || matrix.chart == 'atlas-read' run: | - cd helm/${{ matrix.chart }} + cd ${{ matrix.path }} helm dependency update echo "Chart dependencies:" @@ -59,21 +87,26 @@ jobs: - name: Lint helm chart run: | - helm lint helm/${{ matrix.chart }}/ + helm lint ${{ matrix.path }}/ echo "✅ ${{ matrix.chart }} chart lint passed!" - name: Validate Chart.yaml run: | # Check for required fields - if ! grep -q "^version:" helm/${{ matrix.chart }}/Chart.yaml; then + if ! grep -q "^version:" ${{ matrix.path }}/Chart.yaml; then echo "❌ Error: version field missing in Chart.yaml" exit 1 fi - if ! grep -q "^appVersion:" helm/${{ matrix.chart }}/Chart.yaml; then - echo "❌ Error: appVersion field missing in Chart.yaml" - exit 1 + + # appVersion is only required for application charts (atlas, atlas-read) + if [[ "${{ matrix.requires_app_version }}" == "true" ]]; then + if ! grep -q "^appVersion:" ${{ matrix.path }}/Chart.yaml; then + echo "❌ Error: appVersion field missing in Chart.yaml for application chart" + exit 1 + fi fi - echo "✅ Chart.yaml validation passed!" + + echo "✅ Chart.yaml validation passed for ${{ matrix.chart }}!" build: needs: helm-lint @@ -240,14 +273,15 @@ jobs: if: steps.integration_tests.outcome == 'failure' run: exit 1 + - name: Clean up after integration tests + if: always() run: | - echo "==========================================" - echo "CLEANING UP AFTER INTEGRATION TESTS" - echo "==========================================" - - # Remove test containers and images - echo "Removing test containers and images..." + echo "Cleaning up test environment..." + docker ps -a -q | xargs -r docker rm -f || true + docker system prune -af || true + echo "" + echo "Removing other test containers and images..." docker system prune -af --volumes || true # Clean Maven artifacts to free up space @@ -337,6 +371,15 @@ jobs: name: Multi-Cloud Smoke Test needs: build runs-on: ubuntu-latest + # Only run smoke tests on protected branches (beta, staging, master) + # Feature branches skip smoke tests but can still publish OCI charts for manual testing + if: github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master' + + # Concurrency control: Only one smoke test at a time across all branches + # Shared vClusters cannot handle concurrent deployments + concurrency: + group: smoke-test-shared-vclusters + cancel-in-progress: false # Don't cancel running tests, queue instead env: VCLUSTER_AWS_NAME: ${{ vars.VCLUSTER_AWS_NAME }} @@ -474,11 +517,41 @@ jobs: path: smoke-test-logs/ helm-publish: - needs: smoke-test # Only publish if smoke tests pass in all clouds + needs: [build, smoke-test] runs-on: ubuntu-latest + # Run if smoke-test passed (protected branches) OR was skipped (feature branches) + if: always() && needs.build.result == 'success' && (needs.smoke-test.result == 'success' || needs.smoke-test.result == 'skipped') strategy: matrix: - chart: ['atlas', 'atlas-read'] + include: + # Application charts + - chart: atlas + path: helm/atlas + base_version: "1.0.0" + - chart: atlas-read + path: helm/atlas-read + base_version: "1.0.0" + # Atlas infrastructure charts + - chart: cassandra + path: helm/atlas/charts/cassandra + base_version: "0.14.4" + - chart: elasticsearch + path: helm/atlas/charts/elasticsearch + base_version: "7.6.1" + - chart: logstash + path: helm/atlas/charts/logstash + base_version: "9.1.2" + # Atlas-Read infrastructure charts + - chart: cassandra-online-dc + path: helm/atlas-read/charts/cassandra-online-dc + base_version: "0.14.4" + - chart: elasticsearch-read + path: helm/atlas-read/charts/elasticsearch-read + base_version: "7.6.1" + - chart: elasticsearch-exporter-read + path: helm/atlas-read/charts/elasticsearch-exporter-read + base_version: "3.3.0" + max-parallel: 1 # Publish sequentially to avoid race conditions steps: - name: Checkout code @@ -497,12 +570,12 @@ jobs: - name: Generate chart version id: version run: | - # Semantic version: 1.0.0-branch.commitid + # Semantic version: {base_version}-{branch}.{commit} # Replace underscores with hyphens for semver compliance BRANCH_NAME_NORMALIZED=$(echo "${{ steps.branch.outputs.name }}" | tr '_' '-') - CHART_VERSION="1.0.0-${BRANCH_NAME_NORMALIZED}.${{ steps.commit.outputs.id }}" + CHART_VERSION="${{ matrix.base_version }}-${BRANCH_NAME_NORMALIZED}.${{ steps.commit.outputs.id }}" echo "chart=${CHART_VERSION}" >> $GITHUB_OUTPUT - echo "Generated chart version: ${CHART_VERSION}" + echo "Generated chart version for ${{ matrix.chart }}: ${CHART_VERSION}" - name: Install Helm uses: azure/setup-helm@v3 @@ -511,24 +584,31 @@ jobs: - name: Update Chart.yaml with version run: | - sed -i "s/^version: .*/version: ${{ steps.version.outputs.chart }}/" helm/${{ matrix.chart }}/Chart.yaml - sed -i "s/^appVersion: .*/appVersion: \"${{ steps.commit.outputs.id }}\"/" helm/${{ matrix.chart }}/Chart.yaml + sed -i "s/^version: .*/version: ${{ steps.version.outputs.chart }}/" ${{ matrix.path }}/Chart.yaml + + # Only update appVersion for application charts (atlas, atlas-read) + if [[ "${{ matrix.chart }}" == "atlas" ]] || [[ "${{ matrix.chart }}" == "atlas-read" ]]; then + sed -i "s/^appVersion: .*/appVersion: \"${{ steps.commit.outputs.id }}\"/" ${{ matrix.path }}/Chart.yaml + echo "Updated appVersion for ${{ matrix.chart }}" + fi echo "Updated ${{ matrix.chart }}/Chart.yaml:" - cat helm/${{ matrix.chart }}/Chart.yaml | grep -E "^(version|appVersion):" + cat ${{ matrix.path }}/Chart.yaml | head -10 - - name: Update values.yaml with image tags + - name: Update values.yaml with image tags (application charts only) + if: matrix.chart == 'atlas' || matrix.chart == 'atlas-read' run: | - # Replace placeholders with actual values - sed -i "s/ATLAS_LATEST_IMAGE_TAG/${{ steps.commit.outputs.id }}/g" helm/${{ matrix.chart }}/values.yaml - sed -i "s/ATLAS_BRANCH_NAME/${{ steps.branch.outputs.name }}/g" helm/${{ matrix.chart }}/values.yaml + # Replace placeholders with actual values (only for atlas/atlas-read) + sed -i "s/ATLAS_LATEST_IMAGE_TAG/${{ steps.commit.outputs.id }}/g" ${{ matrix.path }}/values.yaml + sed -i "s/ATLAS_BRANCH_NAME/${{ steps.branch.outputs.name }}/g" ${{ matrix.path }}/values.yaml echo "Image configuration in ${{ matrix.chart }}/values.yaml:" - grep -A 3 "image:" helm/${{ matrix.chart }}/values.yaml | head -10 + grep -A 3 "image:" ${{ matrix.path }}/values.yaml | head -10 - name: Update helm dependencies + if: matrix.chart == 'atlas' || matrix.chart == 'atlas-read' run: | - cd helm/${{ matrix.chart }} + cd ${{ matrix.path }} helm dependency update echo "Chart dependencies:" @@ -537,10 +617,10 @@ jobs: - name: Package helm chart run: | mkdir -p helm-packages - helm package helm/${{ matrix.chart }}/ --destination ./helm-packages/ + helm package ${{ matrix.path }}/ --destination ./helm-packages/ - echo "Packaged charts:" - ls -lh helm-packages/ + echo "Packaged chart: ${{ matrix.chart }}" + ls -lh helm-packages/${{ matrix.chart }}-* - name: Login to GitHub Container Registry uses: docker/login-action@v2 @@ -571,13 +651,9 @@ jobs: **Chart**: `${{ matrix.chart }}` **Chart Version**: `${{ steps.version.outputs.chart }}` - **App Version**: `${{ steps.commit.outputs.id }}` - **Branch**: `${{ steps.branch.outputs.name }}` - - ### 🐳 Docker Image - ``` - ghcr.io/atlanhq/atlas-metastore-${{ steps.branch.outputs.name }}:${{ steps.commit.outputs.id }} - ``` + **Base Version**: `${{ matrix.base_version }}` + **Branch**: `${{ steps.branch.outputs.name }}` + **Commit**: `${{ steps.commit.outputs.id }}` ### 📥 Installation @@ -587,12 +663,21 @@ jobs: --version ${{ steps.version.outputs.chart }} ``` - **Via Downloaded Chart:** + **With custom values:** ```bash - helm install ${{ matrix.chart }} ./${{ matrix.chart }}-${{ steps.version.outputs.chart }}.tgz + helm install ${{ matrix.chart }} oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }} \ + --version ${{ steps.version.outputs.chart }} \ + -f custom-values.yaml ``` + + ### 🔗 Links + + - **Registry**: `oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }}` + - **Repository**: [atlanhq/atlas-metastore](https://github.com/atlanhq/atlas-metastore) + - **Path**: `${{ matrix.path }}` artifacts: "./helm-packages/${{ matrix.chart }}-*.tgz" - token: ${{ secrets.GITHUB_TOKEN }} + token: ${{ secrets.ORG_PAT_GITHUB }} + allowUpdates: true makeLatest: false - name: Chart publish summary diff --git a/helm/atlas-read/Chart.yaml b/helm/atlas-read/Chart.yaml index 82124dece3e..3db42ff234a 100644 --- a/helm/atlas-read/Chart.yaml +++ b/helm/atlas-read/Chart.yaml @@ -17,12 +17,18 @@ sources: - https://github.com/atlanhq/atlas-metastore home: https://github.com/atlanhq/atlas-metastore dependencies: + # Infrastructure charts - disabled by default (published separately as OCI) + # Charts exist in charts/ subdirectory for individual OCI publishing + # Consumed as peers in parent atlan chart, not as subcharts here - name: cassandra-online-dc repository: file://./charts/cassandra-online-dc - version: 0.x.x + version: 0.x.x + condition: cassandra-online-dc.enabled # Disabled in values.yaml - name: elasticsearch-read repository: file://./charts/elasticsearch-read version: 7.x.x + condition: elasticsearch-read.enabled # Disabled in values.yaml - name: elasticsearch-exporter-read repository: file://./charts/elasticsearch-exporter-read version: 3.3.0 + condition: elasticsearch-exporter-read.enabled # Disabled in values.yaml \ No newline at end of file diff --git a/helm/atlas-read/charts/cassandra-online-dc/values.yaml b/helm/atlas-read/charts/cassandra-online-dc/values.yaml index 2188c875e0c..b9087ef8aed 100755 --- a/helm/atlas-read/charts/cassandra-online-dc/values.yaml +++ b/helm/atlas-read/charts/cassandra-online-dc/values.yaml @@ -9,6 +9,10 @@ multiarch: global: Tier_Type: "" Deployment_Type: "" + svcIsolation: + enabled: false + globalSvcIsolation: + enabled: false image: repo: ghcr.io/atlanhq/cassandra tag: 3.11.12 diff --git a/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml b/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml index cecfb9ad3b2..75e8d20a321 100644 --- a/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml +++ b/helm/atlas-read/charts/elasticsearch-exporter-read/values.yaml @@ -7,6 +7,10 @@ multiarch: #replicaCount: 1 global: Tier_Type: "" + esIsolation: + enabled: false + globalSvcIsolation: + enabled: false ## restart policy for all containers ## Namespace: monitoring diff --git a/helm/atlas-read/charts/elasticsearch-read/values.yaml b/helm/atlas-read/charts/elasticsearch-read/values.yaml index c0320e2047b..daf34f8f0ae 100755 --- a/helm/atlas-read/charts/elasticsearch-read/values.yaml +++ b/helm/atlas-read/charts/elasticsearch-read/values.yaml @@ -13,6 +13,10 @@ nodeGroup: "master" global: Tier_Type: "" Deployment_Type: "" + esIsolation: + enabled: false + globalSvcIsolation: + enabled: false masterService: "" # Elasticsearch roles that will be applied to this nodeGroup diff --git a/helm/atlas-read/values.yaml b/helm/atlas-read/values.yaml index 4992bf53bfe..04dbf4d316d 100644 --- a/helm/atlas-read/values.yaml +++ b/helm/atlas-read/values.yaml @@ -1,3 +1,10 @@ +# Infrastructure charts disabled - published separately as OCI artifacts +cassandra-online-dc: + enabled: false +elasticsearch-read: + enabled: false +elasticsearch-exporter-read: + enabled: false multiarch: enabled: false diff --git a/helm/atlas/Chart.yaml b/helm/atlas/Chart.yaml index d9776a78d57..180b8bf9638 100644 --- a/helm/atlas/Chart.yaml +++ b/helm/atlas/Chart.yaml @@ -17,12 +17,18 @@ sources: - https://github.com/atlanhq/atlas-metastore home: https://github.com/atlanhq/atlas-metastore dependencies: + # Infrastructure charts - disabled by default (published separately as OCI) + # Charts exist in charts/ subdirectory for individual OCI publishing + # Consumed as peers in parent atlan chart, not as subcharts here - name: cassandra repository: file://./charts/cassandra version: 0.x.x + condition: cassandra.enabled # Disabled in values.yaml - name: elasticsearch repository: file://./charts/elasticsearch version: 7.x.x + condition: elasticsearch.enabled # Disabled in values.yaml - name: logstash repository: file://./charts/logstash version: 9.x.x + condition: logstash.enabled # Disabled in values.yaml \ No newline at end of file diff --git a/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml b/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml index a273a2fd667..6fe6acb3e80 100755 --- a/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml +++ b/helm/atlas/charts/elasticsearch/templates/poddisruptionbudget.yaml @@ -1,8 +1,8 @@ ---- {{- if .Values.minAvailable }} -{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) -}} +--- +{{- if and (.Capabilities.APIVersions.Has "policy/v1") (semverCompare ">= 1.19-0" .Capabilities.KubeVersion.Version) }} apiVersion: policy/v1 -{{- else -}} +{{- else }} apiVersion: policy/v1beta1 {{- end }} kind: PodDisruptionBudget @@ -14,4 +14,4 @@ spec: selector: matchLabels: app: "{{ template "elasticsearch.uname" . }}" -{{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml b/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml index 26a184d6c0f..e32b04d4aef 100644 --- a/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml +++ b/helm/atlas/charts/elasticsearch/templates/synonym-configmap.yaml @@ -4,7 +4,7 @@ metadata: name: elasticsearch-synonym-config namespace: {{ .Values.Namespace }} labels: - app: {{ template "name" . }} + app: {{ template "elasticsearch.name" . }} chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} release: {{ .Release.Name }} heritage: {{ .Release.Service }} diff --git a/helm/atlas/values.yaml b/helm/atlas/values.yaml index c6d999b9731..fddc62b2dea 100644 --- a/helm/atlas/values.yaml +++ b/helm/atlas/values.yaml @@ -1,3 +1,10 @@ +# Infrastructure charts disabled - published separately as OCI artifacts +cassandra: + enabled: false +elasticsearch: + enabled: false +logstash: + enabled: false multiarch: enabled: false From ee8aebce695fa3dac02b33cb1a2066ee5aaa0148 Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 3 Nov 2025 14:49:10 +0530 Subject: [PATCH 5/7] optimize maven wf --- .github/workflows/maven.yml | 177 +++++++++++++++++++++++++++++++++--- 1 file changed, 166 insertions(+), 11 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index c3958e172cc..ebb2da4cddc 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -34,6 +34,66 @@ on: - mlh-1240-improve-cm-refresh-master jobs: + # Detect what changed to optimize workflow execution + # This dramatically speeds up helm-only changes by skipping the 20+ minute build job + changes: + runs-on: ubuntu-latest + outputs: + code: ${{ steps.filter.outputs.code }} + helm: ${{ steps.filter.outputs.helm }} + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 # Fetch all history for accurate comparisons + + - uses: dorny/paths-filter@v2 + id: filter + with: + # Compare against the previous commit on this branch (not master) + # This way we only detect changes in the current push + base: ${{ github.event.before }} + filters: | + code: + - '**/*.java' + - '**/*.xml' + - '**/*.properties' + - 'webapp/**' + - 'repository/**' + - 'intg/**' + - 'graphdb/**' + - 'pom.xml' + - '**/pom.xml' + helm: + - 'helm/**' + + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + # WORKFLOW EXECUTION PATHS: + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + # + # 📦 HELM-ONLY CHANGES (exclusively helm/** files): + # changes ✅ → helm-lint ✅ → build ⏭️ → smoke-test ⏭️ → helm-publish ✅ + # ⏱️ Time: ~5 minutes (80% faster!) + # 📊 Publishing: All 8 charts + # • Infrastructure charts (6): As-is + # • Application charts (2): With latest existing image tag from GHCR + # + # 💻 CODE/WORKFLOW CHANGES (*.java, maven.yml, Dockerfile, etc.): + # changes ✅ → helm-lint ✅ → build ✅ → smoke-test ✅/⏭️ → helm-publish ✅ + # ⏱️ Time: ~25 minutes (full pipeline) + # 📊 Publishing: All 8 charts + # • Infrastructure charts (6): As-is + # • Application charts (2): With NEW image tag from build + # + # 🛡️ QUALITY GATES: + # • helm-lint must pass (syntax validation) + # • build must pass (for code changes) + # • smoke-test must pass (on protected branches) + # • atlas/atlas-read always reference valid images + # + # Logic: Skip build ONLY if changes are exclusively in helm/** + # + # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + helm-lint: runs-on: ubuntu-latest strategy: @@ -109,7 +169,10 @@ jobs: echo "✅ Chart.yaml validation passed for ${{ matrix.chart }}!" build: - needs: helm-lint + needs: [helm-lint, changes] + # Skip build ONLY if changes are exclusively in helm/** + # Run build for: code changes, workflow changes, Dockerfile, scripts, or anything else + if: needs.changes.outputs.helm != 'true' || needs.changes.outputs.code == 'true' runs-on: ubuntu-latest steps: @@ -369,11 +432,12 @@ jobs: smoke-test: name: Multi-Cloud Smoke Test - needs: build + needs: [build, changes] runs-on: ubuntu-latest - # Only run smoke tests on protected branches (beta, staging, master) - # Feature branches skip smoke tests but can still publish OCI charts for manual testing - if: github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master' + # Run smoke tests on protected branches if build ran (i.e., not helm-only changes) + if: | + needs.build.result == 'success' && + (github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master') # Concurrency control: Only one smoke test at a time across all branches # Shared vClusters cannot handle concurrent deployments @@ -517,10 +581,18 @@ jobs: path: smoke-test-logs/ helm-publish: - needs: [build, smoke-test] + needs: [helm-lint, build, smoke-test, changes] runs-on: ubuntu-latest - # Run if smoke-test passed (protected branches) OR was skipped (feature branches) - if: always() && needs.build.result == 'success' && (needs.smoke-test.result == 'success' || needs.smoke-test.result == 'skipped') + # Run helm-publish if: + # 1. Helm-lint passed (quality gate) + # 2. Build succeeded AND smoke-test passed/skipped: publish (code/workflow changes) + # 3. Build skipped AND helm changed: publish (helm-only changes) + if: | + always() && + needs.helm-lint.result == 'success' && ( + (needs.build.result == 'success' && (needs.smoke-test.result == 'success' || needs.smoke-test.result == 'skipped')) || + (needs.build.result == 'skipped' && needs.changes.outputs.helm == 'true') + ) strategy: matrix: include: @@ -554,20 +626,84 @@ jobs: max-parallel: 1 # Publish sequentially to avoid race conditions steps: + - name: Determine publish strategy + id: can_publish + run: | + # All charts can be published + # Application charts (atlas, atlas-read) have different strategies: + # 1. Build succeeded: Use new image tag (current commit) + # 2. Build skipped (helm-only): Use latest existing image tag from GHCR + echo "should_publish=true" >> $GITHUB_OUTPUT + + if [[ "${{ matrix.chart }}" == "atlas" ]] || [[ "${{ matrix.chart }}" == "atlas-read" ]]; then + if [[ "${{ needs.build.result }}" == "success" ]]; then + echo "use_new_image=true" >> $GITHUB_OUTPUT + echo "✅ ${{ matrix.chart }}: Will use new image from build" + else + echo "use_new_image=false" >> $GITHUB_OUTPUT + echo "📦 ${{ matrix.chart }}: Helm-only change, will use latest existing image" + fi + fi + - name: Checkout code + if: steps.can_publish.outputs.should_publish == 'true' uses: actions/checkout@v3 - name: Get branch name + if: steps.can_publish.outputs.should_publish == 'true' id: branch run: | echo "name=${GITHUB_REF#refs/heads/}" >> $GITHUB_OUTPUT - name: Get commit ID + if: steps.can_publish.outputs.should_publish == 'true' id: commit run: | echo "id=$(echo ${GITHUB_SHA} | cut -c1-7)abcd" >> $GITHUB_OUTPUT + - name: Get latest image tag (for helm-only changes) + if: steps.can_publish.outputs.should_publish == 'true' && steps.can_publish.outputs.use_new_image == 'false' + id: latest_image + env: + GITHUB_TOKEN: ${{ secrets.ORG_PAT_GITHUB }} + run: | + # Fetch the latest existing image tag from GHCR (excluding arm64/amd64 variants) + IMAGE_NAME="atlas-metastore-${{ steps.branch.outputs.name }}" + + echo "🔍 Fetching latest image tag for ${IMAGE_NAME}..." + + # Fetch all tags and filter + all_tags=() + link="n=200" + + while [[ $link != "" ]]; do + response=$(curl -s -X GET "https://ghcr.io/v2/atlanhq/${IMAGE_NAME}/tags/list?${link}" \ + -H "Authorization: Bearer $(echo ${GITHUB_TOKEN} | base64)") + + # Filter out arm64/amd64 variants and "latest" tag + tags=$(echo "$response" | jq -r '.tags[]' 2>/dev/null | grep -vE '(-arm64|-amd64|^latest)$' || true) + all_tags+=($tags) + + # Check for next page + link=$(curl -s -I -X GET "https://ghcr.io/v2/atlanhq/${IMAGE_NAME}/tags/list?${link}" \ + -H "Authorization: Bearer $(echo ${GITHUB_TOKEN} | base64)" | \ + grep -i link | cut -f 2 -d '<' | cut -f 1 -d '>' | cut -f 2 -d '?' || true) + sleep 1 + done + + # Get the last valid tag (most recent) + if [[ ${#all_tags[@]} -eq 0 ]]; then + echo "❌ No valid tags found for ${IMAGE_NAME}" + echo "💡 This might be the first commit on this branch" + exit 1 + else + LATEST_TAG="${all_tags[-1]}" + echo "📌 Latest image tag: ${LATEST_TAG}" + echo "tag=${LATEST_TAG}" >> $GITHUB_OUTPUT + fi + - name: Generate chart version + if: steps.can_publish.outputs.should_publish == 'true' id: version run: | # Semantic version: {base_version}-{branch}.{commit} @@ -578,11 +714,13 @@ jobs: echo "Generated chart version for ${{ matrix.chart }}: ${CHART_VERSION}" - name: Install Helm + if: steps.can_publish.outputs.should_publish == 'true' uses: azure/setup-helm@v3 with: version: '3.12.0' - name: Update Chart.yaml with version + if: steps.can_publish.outputs.should_publish == 'true' run: | sed -i "s/^version: .*/version: ${{ steps.version.outputs.chart }}/" ${{ matrix.path }}/Chart.yaml @@ -596,17 +734,29 @@ jobs: cat ${{ matrix.path }}/Chart.yaml | head -10 - name: Update values.yaml with image tags (application charts only) - if: matrix.chart == 'atlas' || matrix.chart == 'atlas-read' + if: steps.can_publish.outputs.should_publish == 'true' && (matrix.chart == 'atlas' || matrix.chart == 'atlas-read') run: | # Replace placeholders with actual values (only for atlas/atlas-read) - sed -i "s/ATLAS_LATEST_IMAGE_TAG/${{ steps.commit.outputs.id }}/g" ${{ matrix.path }}/values.yaml + # Strategy depends on whether build ran: + # - Build succeeded: Use new image tag (current commit) + # - Build skipped (helm-only): Use latest existing image tag + + if [[ "${{ steps.can_publish.outputs.use_new_image }}" == "true" ]]; then + IMAGE_TAG="${{ steps.commit.outputs.id }}" + echo "✅ Using NEW image tag from build: ${IMAGE_TAG}" + else + IMAGE_TAG="${{ steps.latest_image.outputs.tag }}" + echo "📦 Using LATEST existing image tag: ${IMAGE_TAG}" + fi + + sed -i "s/ATLAS_LATEST_IMAGE_TAG/${IMAGE_TAG}/g" ${{ matrix.path }}/values.yaml sed -i "s/ATLAS_BRANCH_NAME/${{ steps.branch.outputs.name }}/g" ${{ matrix.path }}/values.yaml echo "Image configuration in ${{ matrix.chart }}/values.yaml:" grep -A 3 "image:" ${{ matrix.path }}/values.yaml | head -10 - name: Update helm dependencies - if: matrix.chart == 'atlas' || matrix.chart == 'atlas-read' + if: steps.can_publish.outputs.should_publish == 'true' && (matrix.chart == 'atlas' || matrix.chart == 'atlas-read') run: | cd ${{ matrix.path }} helm dependency update @@ -615,6 +765,7 @@ jobs: ls -la charts/ - name: Package helm chart + if: steps.can_publish.outputs.should_publish == 'true' run: | mkdir -p helm-packages helm package ${{ matrix.path }}/ --destination ./helm-packages/ @@ -623,6 +774,7 @@ jobs: ls -lh helm-packages/${{ matrix.chart }}-* - name: Login to GitHub Container Registry + if: steps.can_publish.outputs.should_publish == 'true' uses: docker/login-action@v2 with: registry: ghcr.io @@ -630,6 +782,7 @@ jobs: password: ${{ secrets.ORG_PAT_GITHUB }} - name: Push chart to GHCR (OCI Registry) + if: steps.can_publish.outputs.should_publish == 'true' run: | CHART_FILE=$(ls helm-packages/${{ matrix.chart }}-*.tgz) echo "Pushing chart: ${CHART_FILE}" @@ -642,6 +795,7 @@ jobs: echo "🏷️ Registry: oci://ghcr.io/atlanhq/helm-charts/${{ matrix.chart }}" - name: Create GitHub Release + if: steps.can_publish.outputs.should_publish == 'true' uses: ncipollo/release-action@v1 with: tag: helm-${{ matrix.chart }}-v${{ steps.version.outputs.chart }} @@ -681,6 +835,7 @@ jobs: makeLatest: false - name: Chart publish summary + if: steps.can_publish.outputs.should_publish == 'true' run: | echo "## 🎉 Helm Chart Published Successfully!" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY From e7337e658420008faa4670f09106cb46e08069dc Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 10 Nov 2025 09:57:47 +0530 Subject: [PATCH 6/7] optimize maven build --- .github/workflows/maven.yml | 148 ++++++++++++++++++------------ scripts/multi-cloud-smoke-test.sh | 15 ++- 2 files changed, 95 insertions(+), 68 deletions(-) diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml index ebb2da4cddc..a52862d2644 100644 --- a/.github/workflows/maven.yml +++ b/.github/workflows/maven.yml @@ -39,13 +39,13 @@ jobs: changes: runs-on: ubuntu-latest outputs: - code: ${{ steps.filter.outputs.code }} + non_helm: ${{ steps.filter.outputs.non_helm }} helm: ${{ steps.filter.outputs.helm }} steps: - uses: actions/checkout@v3 with: fetch-depth: 0 # Fetch all history for accurate comparisons - + - uses: dorny/paths-filter@v2 id: filter with: @@ -53,46 +53,11 @@ jobs: # This way we only detect changes in the current push base: ${{ github.event.before }} filters: | - code: - - '**/*.java' - - '**/*.xml' - - '**/*.properties' - - 'webapp/**' - - 'repository/**' - - 'intg/**' - - 'graphdb/**' - - 'pom.xml' - - '**/pom.xml' helm: - 'helm/**' - - # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - # WORKFLOW EXECUTION PATHS: - # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - # - # 📦 HELM-ONLY CHANGES (exclusively helm/** files): - # changes ✅ → helm-lint ✅ → build ⏭️ → smoke-test ⏭️ → helm-publish ✅ - # ⏱️ Time: ~5 minutes (80% faster!) - # 📊 Publishing: All 8 charts - # • Infrastructure charts (6): As-is - # • Application charts (2): With latest existing image tag from GHCR - # - # 💻 CODE/WORKFLOW CHANGES (*.java, maven.yml, Dockerfile, etc.): - # changes ✅ → helm-lint ✅ → build ✅ → smoke-test ✅/⏭️ → helm-publish ✅ - # ⏱️ Time: ~25 minutes (full pipeline) - # 📊 Publishing: All 8 charts - # • Infrastructure charts (6): As-is - # • Application charts (2): With NEW image tag from build - # - # 🛡️ QUALITY GATES: - # • helm-lint must pass (syntax validation) - # • build must pass (for code changes) - # • smoke-test must pass (on protected branches) - # • atlas/atlas-read always reference valid images - # - # Logic: Skip build ONLY if changes are exclusively in helm/** - # - # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + non_helm: + - '**' + - '!helm/**' helm-lint: runs-on: ubuntu-latest @@ -171,8 +136,8 @@ jobs: build: needs: [helm-lint, changes] # Skip build ONLY if changes are exclusively in helm/** - # Run build for: code changes, workflow changes, Dockerfile, scripts, or anything else - if: needs.changes.outputs.helm != 'true' || needs.changes.outputs.code == 'true' + # Run build for: any changes outside helm/ (future-proof) + if: needs.changes.outputs.non_helm == 'true' runs-on: ubuntu-latest steps: @@ -201,10 +166,20 @@ jobs: - name: Cache Maven packages uses: actions/cache@v3 + id: maven-cache with: - path: ~/.m2 - key: ${{ runner.os }}-m2-${{ hashFiles('**/build.sh') }} - restore-keys: ${{ runner.os }}-m2 + path: ~/.m2/repository + key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }} + restore-keys: | + ${{ runner.os }}-maven- + + - name: Check Maven cache status + run: | + if [ "${{ steps.maven-cache.outputs.cache-hit }}" == "true" ]; then + echo "✅ Maven cache hit - dependencies will be reused" + else + echo "ℹ️ Maven cache miss - will download dependencies" + fi - name: Get branch name run: | @@ -228,6 +203,7 @@ jobs: - name: Check disk space before tests id: check_disk + if: github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master' run: | echo "==========================================" echo "DISK SPACE CHECK" @@ -247,7 +223,7 @@ jobs: fi - name: Free up disk space for tests - if: steps.check_disk.outputs.disk_usage > 70 + if: steps.check_disk.outputs.disk_usage > 70 && (github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master') run: | echo "==========================================" echo "CLEANING UP DISK SPACE" @@ -283,6 +259,7 @@ jobs: df -h / | grep -E '^/dev/' || df -h / | tail -1 - name: Verify sufficient disk space + if: github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master' run: | echo "==========================================" echo "VERIFYING DISK SPACE" @@ -306,6 +283,7 @@ jobs: - name: Run Integration Tests id: integration_tests + if: github.ref_name == 'beta' || github.ref_name == 'staging' || github.ref_name == 'master' continue-on-error: true env: # Configure Testcontainers for GitHub Actions @@ -314,31 +292,22 @@ jobs: DOCKER_HOST: unix:///var/run/docker.sock run: | echo "Running integration tests..." - chmod +x ./run-integration-tests.sh && ./run-integration-tests.sh - + chmod +x ./run-integration-tests.sh && ./run-integration-tests.sh --skip-build + - name: Upload container logs as artifact - if: always() # Upload logs even if tests pass (for debugging) + if: always() && steps.integration_tests.outcome != 'skipped' uses: actions/upload-artifact@v4 with: name: container-logs-${{ github.run_id }} path: target/test-logs/ retention-days: 5 - - - name: Setup tmate session on test failure - if: steps.integration_tests.outcome == 'failure' - uses: mxschmitt/action-tmate@v3 - timeout-minutes: 30 - with: - detached: true - limit-access-to-actor: false - + - name: Fail the workflow if tests failed if: steps.integration_tests.outcome == 'failure' run: exit 1 - - name: Clean up after integration tests - if: always() + if: always() && steps.integration_tests.outcome != 'skipped' run: | echo "Cleaning up test environment..." docker ps -a -q | xargs -r docker rm -f || true @@ -355,6 +324,7 @@ jobs: echo "Cleaning test artifacts..." rm -rf webapp/target/surefire-reports/ || true rm -rf test-debug-logs/ || true + rm -rf target/test-logs/ || true # Clean temp files echo "Cleaning temp files..." @@ -579,6 +549,64 @@ jobs: with: name: smoke-test-logs-${{ github.run_id }} path: smoke-test-logs/ + + - name: Smoke test summary + if: always() + run: | + echo "## Multi-Cloud Smoke Tests" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Image**: \`${{ env.TEST_IMAGE }}\`" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + # Check results for each cloud + AWS_STATUS="❓ Unknown" + AZURE_STATUS="❓ Unknown" + GCP_STATUS="❓ Unknown" + + if [ -f "smoke-test-logs/AWS.log" ]; then + if grep -q "SMOKE TEST PASSED" smoke-test-logs/AWS.log; then + AWS_STATUS="✅ Passed" + else + AWS_STATUS="❌ Failed" + fi + fi + + if [ -f "smoke-test-logs/Azure.log" ]; then + if grep -q "SMOKE TEST PASSED" smoke-test-logs/Azure.log; then + AZURE_STATUS="✅ Passed" + else + AZURE_STATUS="❌ Failed" + fi + fi + + if [ -f "smoke-test-logs/GCP.log" ]; then + if grep -q "SMOKE TEST PASSED" smoke-test-logs/GCP.log; then + GCP_STATUS="✅ Passed" + else + GCP_STATUS="❌ Failed" + fi + fi + + echo "| Cloud | vCluster | Status |" >> $GITHUB_STEP_SUMMARY + echo "|-------|----------|--------|" >> $GITHUB_STEP_SUMMARY + echo "| AWS | \`${{ env.VCLUSTER_AWS_NAME }}\` | $AWS_STATUS |" >> $GITHUB_STEP_SUMMARY + echo "| Azure | \`${{ env.VCLUSTER_AZURE_NAME }}\` | $AZURE_STATUS |" >> $GITHUB_STEP_SUMMARY + echo "| GCP | \`${{ env.VCLUSTER_GCP_NAME }}\` | $GCP_STATUS |" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + + # Overall status + if [[ "$AWS_STATUS" == *"Passed"* ]] && [[ "$AZURE_STATUS" == *"Passed"* ]] && [[ "$GCP_STATUS" == *"Passed"* ]]; then + echo "**Overall**: ✅ All smoke tests passed" >> $GITHUB_STEP_SUMMARY + else + echo "**Overall**: ⚠️ Some smoke tests failed" >> $GITHUB_STEP_SUMMARY + fi + + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Tests Run**:" >> $GITHUB_STEP_SUMMARY + echo "- Deployment patch and rollout" >> $GITHUB_STEP_SUMMARY + echo "- Atlas API health check (\`/api/atlas/admin/status\`)" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "📊 [View detailed logs in artifacts](#)" >> $GITHUB_STEP_SUMMARY helm-publish: needs: [helm-lint, build, smoke-test, changes] diff --git a/scripts/multi-cloud-smoke-test.sh b/scripts/multi-cloud-smoke-test.sh index 613ecc71a54..70dd7b6b474 100755 --- a/scripts/multi-cloud-smoke-test.sh +++ b/scripts/multi-cloud-smoke-test.sh @@ -3,7 +3,7 @@ ############################################################################## # Multi-Cloud Smoke Test Script # -# Tests Atlas deployment across multiple cloud environments in parallel +# Tests Atlas statefulset across multiple cloud environments in parallel # # Usage: # ./multi-cloud-smoke-test.sh @@ -64,20 +64,20 @@ test_cloud() { exit 1 fi - # Patch deployment - echo "[${CLOUD}] Patching Atlas deployment..." - if ! KUBECONFIG=$KUBECONFIG_FILE kubectl set image deployment/atlas \ + # Patch statefulset + echo "[${CLOUD}] Patching Atlas statefulset..." + if ! KUBECONFIG=$KUBECONFIG_FILE kubectl set image statefulset/atlas \ atlas-main=$TEST_IMAGE \ -n atlas; then - echo "[${CLOUD}] ❌ ERROR: Failed to patch deployment" + echo "[${CLOUD}] ❌ ERROR: Failed to patch statefulset" exit 1 fi - echo "[${CLOUD}] ✓ Deployment patched" + echo "[${CLOUD}] ✓ StatefulSet patched" echo "" # Wait for rollout echo "[${CLOUD}] Waiting for rollout (10 min timeout)..." - if KUBECONFIG=$KUBECONFIG_FILE kubectl rollout status deployment/atlas -n atlas --timeout=10m; then + if KUBECONFIG=$KUBECONFIG_FILE kubectl rollout status statefulset/atlas -n atlas --timeout=10m; then echo "[${CLOUD}] ✓ Rollout completed successfully" else echo "[${CLOUD}] ❌ ERROR: Rollout failed or timed out" @@ -315,4 +315,3 @@ if [ $FAILED -eq 1 ]; then fi echo -e "${GREEN}✅ All smoke tests passed!${NC}" - From 643c4c12d811c1a1483f678a9f196c33c7f7a19e Mon Sep 17 00:00:00 2001 From: Krishnanunni M Date: Mon, 10 Nov 2025 10:19:44 +0530 Subject: [PATCH 7/7] sync with atlan --- helm/atlas-read/templates/configmap.yaml | 2 - helm/atlas-read/templates/deployment.yaml | 236 ------------------ helm/atlas-read/templates/statefulset.yaml | 3 +- helm/atlas-read/values.yaml | 2 +- helm/atlas/charts/logstash/values.yaml | 149 +++++++++++- helm/atlas/templates/configmap.yaml | 2 - helm/atlas/templates/deployment.yaml | 265 --------------------- helm/atlas/templates/statefulset.yaml | 5 +- helm/atlas/values.yaml | 2 +- 9 files changed, 144 insertions(+), 522 deletions(-) delete mode 100644 helm/atlas-read/templates/deployment.yaml delete mode 100644 helm/atlas/templates/deployment.yaml diff --git a/helm/atlas-read/templates/configmap.yaml b/helm/atlas-read/templates/configmap.yaml index d147f850a26..8dfbb025ccb 100644 --- a/helm/atlas-read/templates/configmap.yaml +++ b/helm/atlas-read/templates/configmap.yaml @@ -277,11 +277,9 @@ data: atlas.server.type.cache-refresher=http://cinv.atlas.svc.cluster.local:5000/cinv atlas.server.type.cache-refresher-health=http://cinv.atlas.svc.cluster.local:5000/health #### Enabled the configs below as per need if HA is enabled ##### - {{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} atlas.server.ids=id1,id2 atlas.server.address.id1=atlas-read-0.atlas-read-service-atlas.atlas.svc.cluster.local:21000 atlas.server.address.id2=atlas-read-1.atlas-read-service-atlas.atlas.svc.cluster.local:21000 - {{- end }} atlas.server.ha.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 atlas.server.ha.zookeeper.retry.sleeptime.ms=10000 atlas.server.ha.zookeeper.num.retries=18 diff --git a/helm/atlas-read/templates/deployment.yaml b/helm/atlas-read/templates/deployment.yaml deleted file mode 100644 index 5504e8503ec..00000000000 --- a/helm/atlas-read/templates/deployment.yaml +++ /dev/null @@ -1,236 +0,0 @@ -{{- if and (.Values.deploy) (.Values.deploy.enabled) }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.atlas.name }} - namespace: {{ .Values.Namespace }} - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} - annotations: - configmap.reloader.stakater.com/reload: "atlas-read-config,atlas-read-logback-config,atlas-read-audit-index,atlas-read-keycloak-config,atlas-read-init-script,atlas-read-init-container-script" - secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-read-keycloak-config" -spec: - selector: - matchLabels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - {{- if not (and (.Values.hpa) (.Values.hpa.enabled)) }} - replicas: {{ .Values.atlas.replicaCount }} - {{- end }} - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - annotations: -{{ toYaml .Values.atlas.podAnnotations | indent 8 }} - spec: - {{- with .Values.atlas.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} - affinity: - nodeAffinity: - {{- if eq .Values.atlas.custom_deployment.enabled true }} - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} - - matchExpressions: - - key: purpose - operator: In - values: - - search - - key: karpenter.sh/capacity-type - operator: In - values: - - on-demand - {{- else }} - - matchExpressions: - - key: node.kubernetes.io/instance-type - operator: In - values: - {{- range .Values.atlas.custom_deployment.instance_type }} - - {{ . }} - {{- end }} - {{- end }} - {{- else }} - {{- if eq .Values.global.Deployment_Type "Development" }} - preferredDuringSchedulingIgnoredDuringExecution: - {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} - {{- else }} - requiredDuringSchedulingIgnoredDuringExecution: - {{- toYaml .Values.atlas.affinity.nodeAffinity.requiredDuringSchedulingIgnoredDuringExecution | nindent 12 }} - {{- end }} - {{- end }} - {{- if eq .Values.atlas.podAntiAffinity true }} - podAntiAffinity: - {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} - {{- end }} - {{- end }} - {{- if .Values.priorityClassName }} - priorityClassName: {{ .Values.priorityClassName }} - {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - initContainers: - {{- if .Values.atlas.initContainers }} - {{- toYaml .Values.atlas.initContainers | nindent 8 }} - {{- end }} - serviceAccountName: cinv-sa - containers: - - name: {{ .Chart.Name }}-main - command: [ - "/bin/bash", - "-c", - "/create-atlas-keycloak-config.sh; - /env_change.sh; - /opt/apache-atlas/bin/atlas_start.py; - tail -F /opt/apache-atlas/logs/*.log;" - ] - image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" - imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} - ports: - - containerPort: {{ .Values.atlas.service.targetPort }} - env: - {{- toYaml .Values.atlas.env | nindent 12 }} - - name: K8S_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: K8S_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_UID - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: Namespace - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: HOST_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: OTEL_SERVICE_NAME - value: atlas - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://$(HOST_IP):4317 - - name: OTEL_RESOURCE_ATTRIBUTES - value: >- - k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- if eq .Values.albTenant true }} - - name: ALB_ENABLED - value: "true" - {{- end }} - envFrom: - - secretRef: - name: atlas-read-keycloak-config - {{- if .Values.multitenant }} - - secretRef: - name: atlas-secret-manager - - secretRef: - name: atlas-secret-parameter-store - - secretRef: - name: instance-domain-name - {{- end }} - resources: - {{- $tierType := .Values.global.Tier_Type | default "" }} - {{- if eq $tierType "Enterprise" }} - {{ toYaml .Values.atlas.resources | nindent 12 }} - {{- else if eq $tierType "Basic" }} - {{ toYaml .Values.atlas.resources_basic | nindent 12 }} - {{- else if eq $tierType "Standard" }} - {{ toYaml .Values.atlas.resources_standard | nindent 12 }} - {{- else }} - {{- toYaml .Values.atlas.resources | nindent 12 }} - {{- end }} - volumeMounts: - - name: atlas-read-config - mountPath: /opt/apache-atlas/conf/atlas-application.properties - subPath: atlas-application.properties - - name: atlas-read-logback-config - mountPath: /opt/apache-atlas/conf/atlas-logback.xml - subPath: atlas-logback.xml - - name: create-atlas-keycloak-config - mountPath: /create-atlas-keycloak-config.sh - subPath: create-atlas-keycloak-config.sh - - name: atlas-logs - mountPath: /opt/apache-atlas/logs - {{- if .Values.atlas.lifecycle }} - lifecycle: - {{- toYaml .Values.atlas.lifecycle | nindent 12 }} - {{- end }} - {{- if .Values.atlas.livenessProbe }} - livenessProbe: - {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.atlas.readinessProbe }} - readinessProbe: - {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} - {{- end }} - {{- if .Values.atlas.telegraf.enabled }} - - name: telegrafd - imagePullPolicy: IfNotPresent - {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} - image: {{ .Values.multiarch.image.telegrafd }} - {{- else }} - image: ghcr.io/atlanhq/telegraf:1.24.3 - {{- end }} - resources: - {{- toYaml .Values.atlas.telegraf.resources | nindent 12 }} - volumeMounts: - - name: telegraf-conf - mountPath: /etc/telegraf/ - ports: - - name: telegrafd - containerPort: 9273 - {{- end }} - {{- if .Values.atlas.imagePullSecrets }} - imagePullSecrets: - {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} - {{- end }} - volumes: - - name: atlas-logs - emptyDir: {} - - name: atlas-read-config - configMap: - name: atlas-read-config - - name: atlas-read-logback-config - configMap: - name: atlas-read-logback-config - - name: create-atlas-keycloak-config - configMap: - name: create-atlas-read-keycloak-config-cm - defaultMode: 0755 - - name: atlas-read-init-script - configMap: - name: atlas-read-init-script - defaultMode: 0755 - - name: atlas-read-init-container-script - configMap: - name: atlas-read-init-container-script - defaultMode: 0755 - - name: atlas-read-audit-index - configMap: - name: atlas-read-audit-index - defaultMode: 0755 - - name: atlas-config-map-rw-vol - emptyDir: {} - {{- if .Values.atlas.telegraf.enabled }} - - name: telegraf-conf - configMap: - name: atlas-read-telegrafd - {{- end }} -{{- end }} diff --git a/helm/atlas-read/templates/statefulset.yaml b/helm/atlas-read/templates/statefulset.yaml index 57e8fb3ce89..1a45b233117 100644 --- a/helm/atlas-read/templates/statefulset.yaml +++ b/helm/atlas-read/templates/statefulset.yaml @@ -1,4 +1,4 @@ -{{- if and (not (and (.Values.deploy) (.Values.deploy.enabled))) (or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled)) }} +{{- if or .Values.global.svcIsolation.enabled (or .Values.global.esIsolation.enabled .Values.global.globalSvcIsolation.enabled) }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -78,6 +78,7 @@ spec: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} {{- end }} + serviceAccountName: cinv-sa containers: - name: {{ .Chart.Name }}-main command: [ diff --git a/helm/atlas-read/values.yaml b/helm/atlas-read/values.yaml index 04dbf4d316d..1a408f627b9 100644 --- a/helm/atlas-read/values.yaml +++ b/helm/atlas-read/values.yaml @@ -315,7 +315,7 @@ atlas: scheme: HTTP failureThreshold: 3 initialDelaySeconds: 30 - periodSeconds: 60 + periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 diff --git a/helm/atlas/charts/logstash/values.yaml b/helm/atlas/charts/logstash/values.yaml index 7cd345caa67..33fb852347b 100644 --- a/helm/atlas/charts/logstash/values.yaml +++ b/helm/atlas/charts/logstash/values.yaml @@ -99,8 +99,15 @@ logstashPipeline: } } + # Debug: Log business attribute operations + if [operationType] == "BUSINESS_ATTRIBUTE_UPDATE" { + mutate { + add_field => { "[@metadata][debug_ba_operation_detected]" => "true" } + } + } + # Only process relevant operations - if [operationType] not in ["ENTITY_CREATE", "ENTITY_UPDATE", "ENTITY_DELETE", "CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + if [operationType] not in ["ENTITY_CREATE", "ENTITY_UPDATE", "ENTITY_DELETE", "CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE", "BUSINESS_ATTRIBUTE_UPDATE"] { drop { } } @@ -116,8 +123,8 @@ logstashPipeline: drop { } } - # Transform Atlas entity to ES document - if [operationType] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + # Transform Atlas entity to ES document (skip for classification and business attribute operations) + if [operationType] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE", "BUSINESS_ATTRIBUTE_UPDATE"] { ruby { code => " require 'json' @@ -149,6 +156,14 @@ logstashPipeline: end end + # Skip UPDATE if no attributes to prevent empty document overwrites + if operation_type == 'ENTITY_UPDATE' + if attrs.nil? || attrs.empty? + event.cancel + return + end + end + # Build document based on operation type if operation_type == 'ENTITY_UPDATE' # For UPDATE: Build minimal document with only changed fields + essential metadata @@ -200,9 +215,9 @@ logstashPipeline: if value.is_a?(Array) && !value.empty? first_item = value[0] if first_item.is_a?(Hash) && - first_item.key?('guid') && - first_item.key?('typeName') && - first_item.key?('uniqueAttributes') + first_item.key?('guid') && + first_item.key?('typeName') && + first_item.key?('uniqueAttributes') return true end end @@ -427,7 +442,100 @@ logstashPipeline: } } - # ES Lookup Fallback for operations missing docId (UPDATE, DELETE, CLASSIFICATION_ADD, CLASSIFICATION_DELETE, CLASSIFICATION_UPDATE) + # Business Attribute Update transformation + if [operationType] == "BUSINESS_ATTRIBUTE_UPDATE" { + ruby { + code => " + require 'json' + require 'time' + + entity = event.get('entity') + mutated_details = event.get('mutatedDetails') + operation_type = event.get('operationType') + + # Skip if no entity + if entity.nil? + event.cancel + return + end + + # Skip if no mutatedDetails for business attribute operations + if mutated_details.nil? || mutated_details.empty? + event.cancel + return + end + + # Ensure mutatedDetails is a hash + if !mutated_details.is_a?(Hash) + event.cancel + return + end + + # Build base document with essential metadata + document = { + '__modificationTimestamp' => (entity['updateTime'] || (Time.now.to_f * 1000).to_i), + '__modifiedBy' => (entity['updatedBy'] || 'atlas-kafka-sync') + } + + # Flatten business attributes: iterate through each BA and its properties + # mutatedDetails structure: { ba_id: { property_id: value } } + mutated_details.each do |ba_id, properties| + next if properties.nil? || !properties.is_a?(Hash) + + # Iterate through properties and only add non-null values + properties.each do |property_id, property_value| + # Only include property if value is not null + unless property_value.nil? + document[property_id] = property_value + end + end + end + + # Set ES document ID from docId or guid + es_doc_id = entity['docId'] || entity['guid'] + event.set('[@metadata][es_doc_id]', es_doc_id) + event.set('[@metadata][guid]', entity['guid']) + event.set('[@metadata][operation_type]', operation_type) + event.set('[@metadata][needs_guid_lookup]', es_doc_id == entity['guid']) + + # Debug metadata + event.set('[@metadata][debug_ba_count]', mutated_details.keys.length) + non_null_properties = 0 + mutated_details.each do |ba_id, properties| + next unless properties.is_a?(Hash) + non_null_properties += properties.values.count { |v| !v.nil? } + end + event.set('[@metadata][debug_non_null_properties]', non_null_properties) + + # Add all fields to event + document.each do |k, v| + event.set(k, v) + end + + # Clean up original fields + event.remove('entity') + event.remove('mutatedDetails') + event.remove('operationType') + event.remove('message') + event.remove('internalAttributes') + event.remove('msgCreatedBy') + event.remove('msgSourceIP') + event.remove('msgCreationTime') + event.remove('msgSplitIdx') + event.remove('msgSplitCount') + event.remove('spooled') + event.remove('eventTime') + event.remove('version') + event.remove('source') + event.remove('msgCompressionKind') + event.remove('@timestamp') + event.remove('@version') + event.remove('event') + " + } + } + + # ES Lookup Fallback for operations missing docId (UPDATE, DELETE, CLASSIFICATION_ADD, CLASSIFICATION_DELETE, CLASSIFICATION_UPDATE, BUSINESS_ATTRIBUTE_UPDATE) if [@metadata][needs_guid_lookup] { http { url => "http://atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200/janusgraph_vertex_index/_search" @@ -467,8 +575,8 @@ logstashPipeline: } } - # Remove empty/null fields to match Atlas behavior (skip for classification operations) - if [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + # Remove empty/null fields to match Atlas behavior (skip for classification and business attribute operations) + if [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE", "BUSINESS_ATTRIBUTE_UPDATE"] { ruby { code => " # Remove fields with empty/null values (operation-aware) @@ -521,6 +629,8 @@ logstashPipeline: } } else if [@metadata][operation_type] in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { mutate { add_field => { "[@metadata][es_action]" => "update" } } + } else if [@metadata][operation_type] == "BUSINESS_ATTRIBUTE_UPDATE" { + mutate { add_field => { "[@metadata][es_action]" => "update" } } } # Removed internal metrics - using Prometheus exporter instead @@ -554,7 +664,7 @@ logstashPipeline: } # update path - partial document updates - if [@metadata][es_action] == "update" and [@metadata][is_soft_delete] != "true" and [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE"] { + if [@metadata][es_action] == "update" and [@metadata][is_soft_delete] != "true" and [@metadata][operation_type] not in ["CLASSIFICATION_ADD", "CLASSIFICATION_DELETE", "CLASSIFICATION_UPDATE", "BUSINESS_ATTRIBUTE_UPDATE"] { elasticsearch { hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] index => "janusgraph_vertex_index" @@ -585,6 +695,22 @@ logstashPipeline: } } + # business attribute operations - partial document updates + if [@metadata][es_action] == "update" and [@metadata][operation_type] == "BUSINESS_ATTRIBUTE_UPDATE" { + elasticsearch { + hosts => ["atlas-elasticsearch-read-master.atlas.svc.cluster.local:9200"] + index => "janusgraph_vertex_index" + action => "update" + document_id => "%{[@metadata][es_doc_id]}" + doc_as_upsert => false + timeout => 90 + retry_max_interval => 60 + retry_initial_interval => 1 + ilm_enabled => false + manage_template => false + } + } + # soft-delete via update + inline painless script if [@metadata][es_action] == "update" and [@metadata][is_soft_delete] == "true" { elasticsearch { @@ -722,6 +848,7 @@ podAnnotations: # Pipeline status annotations atlan.com/pipeline: "kafka-to-elasticsearch" atlan.com/component: "logstash" + configmap.reloader.stakater.com/reload: "atlas-logstash-config,atlas-logstash-pipeline,atlas-logstash-metricbeat-config" # additionals labels labels: @@ -1012,4 +1139,4 @@ ingress: tls: [] # - secretName: logstash-example-tls # hosts: - # - logstash-example.local + # - logstash-example.local \ No newline at end of file diff --git a/helm/atlas/templates/configmap.yaml b/helm/atlas/templates/configmap.yaml index 398db75da45..365bb22a188 100644 --- a/helm/atlas/templates/configmap.yaml +++ b/helm/atlas/templates/configmap.yaml @@ -267,11 +267,9 @@ data: atlas.server.type.cache-refresher=http://cinv.atlas.svc.cluster.local:5000/cinv atlas.server.type.cache-refresher-health=http://cinv.atlas.svc.cluster.local:5000/health #### Enabled the configs below as per need if HA is enabled ##### - {{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} atlas.server.ids=id1,id2 atlas.server.address.id1=atlas-0.atlas-service-atlas.atlas.svc.cluster.local:21000 atlas.server.address.id2=atlas-1.atlas-service-atlas.atlas.svc.cluster.local:21000 - {{- end }} atlas.server.ha.zookeeper.connect=zookeeper-0.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-1.zookeeper-headless.atlas.svc.cluster.local:2181,zookeeper-2.zookeeper-headless.atlas.svc.cluster.local:2181 atlas.server.ha.zookeeper.retry.sleeptime.ms=10000 atlas.server.ha.zookeeper.num.retries=18 diff --git a/helm/atlas/templates/deployment.yaml b/helm/atlas/templates/deployment.yaml deleted file mode 100644 index 96df1ead36d..00000000000 --- a/helm/atlas/templates/deployment.yaml +++ /dev/null @@ -1,265 +0,0 @@ -{{- if and (.Values.deploy) (.Values.deploy.enabled) }} -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Values.atlas.name }} - namespace: {{ .Values.Namespace }} - labels: - app: {{ template "name" . }} - chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} - release: {{ .Release.Name }} - heritage: {{ .Release.Service }} - annotations: - configmap.reloader.stakater.com/reload: "atlas-config,atlas-logback-config,atlas-audit-index,atlas-keycloak-config,atlas-init-script,atlas-init-container-script,rate-limit-nginx-config" - secret.reloader.stakater.com/reload: "atlas-secret-manager,atlas-init-secret,atlas-keycloak-config" -spec: - selector: - matchLabels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - {{- if not (and (.Values.hpa) (.Values.hpa.enabled)) }} - replicas: {{ .Values.atlas.replicaCount }} - {{- end }} - template: - metadata: - labels: - app: {{ template "name" . }} - release: {{ .Release.Name }} - annotations: -{{ toYaml .Values.atlas.podAnnotations | indent 8 }} - spec: - {{- with .Values.atlas.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- if and .Values.atlas.affinity (ne .Values.global.Tier_Type "Basic") (ne .Values.global.Tier_Type "Standard") }} - affinity: - nodeAffinity: - {{- if eq .Values.atlas.custom_deployment.enabled true }} - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - {{- if (default false .Values.atlas.custom_deployment.karpenter_enabled) }} - - matchExpressions: - - key: purpose - operator: In - values: - - search - - key: karpenter.sh/capacity-type - operator: In - values: - - on-demand - {{- else }} - - matchExpressions: - - key: node.kubernetes.io/instance-type - operator: In - values: - {{- range .Values.atlas.custom_deployment.instance_type }} - - {{ . }} - {{- end }} - {{- end }} - {{- else }} - preferredDuringSchedulingIgnoredDuringExecution: - {{- toYaml .Values.atlas.affinity.nodeAffinity.preferredDuringSchedulingIgnoredDuringExecution | nindent 10 }} - {{- end }} - {{- if eq .Values.atlas.podAntiAffinity true }} - podAntiAffinity: - {{- toYaml .Values.atlas.affinity.podAntiAffinity | nindent 10 }} - {{- end }} - {{- end }} - {{- if .Values.priorityClassName }} - priorityClassName: {{ .Values.priorityClassName }} - {{- end }} - {{- with .Values.atlas.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - initContainers: - {{- if .Values.atlas.initContainers }} - {{- toYaml .Values.atlas.initContainers | nindent 8 }} - {{- end }} - serviceAccountName: cinv-sa - containers: - - name: {{ .Chart.Name }}-main - command: [ - "/bin/bash", - "-c", - "/create-atlas-keycloak-config.sh; - /env_change.sh; - /opt/apache-atlas/bin/atlas_start.py; - tail -F /opt/apache-atlas/logs/*.log;" - ] - image: "{{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }}" - imagePullPolicy: {{ .Values.atlas.image.pullPolicy }} - ports: - - containerPort: {{ .Values.atlas.service.targetPort }} - env: - {{- toYaml .Values.atlas.env | nindent 12 }} - - name: K8S_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP - - name: K8S_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: POD_UID - valueFrom: - fieldRef: - fieldPath: metadata.uid - - name: K8S_NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: Namespace - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: HOST_IP - valueFrom: - fieldRef: - fieldPath: status.hostIP - - name: OTEL_SERVICE_NAME - value: atlas - - name: OTEL_EXPORTER_OTLP_ENDPOINT - value: http://$(HOST_IP):4317 - - name: OTEL_RESOURCE_ATTRIBUTES - value: >- - k8s.pod.name=$(K8S_POD_NAME),k8s.container.image={{ .Values.atlas.image.repository }}:{{ .Values.atlas.image.tag }},k8s.pod.uid=$(POD_UID),k8s.node.name=$(K8S_NODE_NAME),k8s.namespace.name=$(Namespace),k8s.log.type=service-logs - {{- if eq .Values.albTenant true }} - - name: ALB_ENABLED - value: "true" - {{- end }} - envFrom: - - secretRef: - name: atlas-keycloak-config - {{- if .Values.multitenant }} - - secretRef: - name: atlas-secret-manager - - secretRef: - name: atlas-secret-parameter-store - - secretRef: - name: instance-domain-name - {{- end }} - resources: - {{- $tierType := .Values.global.Tier_Type | default "" }} - {{- if eq $tierType "Enterprise" }} - {{ toYaml .Values.atlas.resources | nindent 12 }} - {{- else if eq $tierType "Basic" }} - {{ toYaml .Values.atlas.resources_basic | nindent 12 }} - {{- else if eq $tierType "Standard" }} - {{ toYaml .Values.atlas.resources_standard | nindent 12 }} - {{- else }} - {{- toYaml .Values.atlas.resources | nindent 12 }} - {{- end }} - volumeMounts: - - name: atlas-config - mountPath: /opt/apache-atlas/conf/atlas-application.properties - subPath: atlas-application.properties - - name: atlas-logback-config - mountPath: /opt/apache-atlas/conf/atlas-logback.xml - subPath: atlas-logback.xml - - name: create-atlas-keycloak-config - mountPath: /create-atlas-keycloak-config.sh - subPath: create-atlas-keycloak-config.sh - - name: atlas-logs - mountPath: /opt/apache-atlas/logs - {{- if .Values.atlas.lifecycle }} - lifecycle: - {{- toYaml .Values.atlas.lifecycle | nindent 12 }} - {{- end }} - {{- if .Values.atlas.livenessProbe }} - livenessProbe: - {{- toYaml .Values.atlas.livenessProbe | nindent 12 }} - {{- end }} - {{- if .Values.atlas.readinessProbe }} - readinessProbe: - {{- toYaml .Values.atlas.readinessProbe | nindent 12 }} - {{- end }} - {{- if .Values.atlas.telegraf.enabled }} - - name: telegrafd - imagePullPolicy: IfNotPresent - {{- if and .Values.multiarch.enabled .Values.multiarch.image.telegrafd }} - image: {{ .Values.multiarch.image.telegrafd }} - {{- else }} - image: ghcr.io/atlanhq/telegraf:1.24.3 - {{- end }} - resources: - {{- toYaml .Values.atlas.telegraf.resources | nindent 12 }} - volumeMounts: - - name: telegraf-conf - mountPath: /etc/telegraf/ - {{- if .Values.nginx.enabled }} - - name: nginx-log-volume - mountPath: /var/log/nginx - readOnly: true - {{- end }} - ports: - - name: telegrafd - containerPort: 9273 - {{- end }} - {{- if .Values.nginx.enabled }} - - name: nginx-ratelimit - image: ghcr.io/atlanhq/nginx-vts-atlan-v2:1.27.5.1-multiarch - ports: - - containerPort: 8080 - protocol: TCP - resources: - limits: - cpu: 200m - memory: 256Mi - requests: - cpu: 100m - memory: 128Mi - volumeMounts: - - name: nginx-config - mountPath: /etc/nginx/nginx.conf - subPath: nginx.conf - - name: nginx-log-volume - mountPath: /var/log/nginx - {{- end }} - {{- if .Values.atlas.imagePullSecrets }} - imagePullSecrets: - {{- toYaml .Values.atlas.imagePullSecrets | nindent 8 }} - {{- end }} - volumes: - - name: atlas-logs - emptyDir: {} - - name: atlas-config-map-rw-vol - emptyDir: {} - - name: atlas-config - configMap: - name: atlas-config - - name: atlas-logback-config - configMap: - name: atlas-logback-config - - name: create-atlas-keycloak-config - configMap: - name: create-atlas-keycloak-config-cm - defaultMode: 0755 - - name: atlas-init-script - configMap: - name: atlas-init-script - defaultMode: 0755 - - name: atlas-init-container-script - configMap: - name: atlas-init-container-script - defaultMode: 0755 - - name: atlas-audit-index - configMap: - name: atlas-audit-index - defaultMode: 0755 - - name: atlas-config-map-rw-vol - emptyDir: {} - {{- if .Values.nginx.enabled }} - - name: nginx-log-volume - emptyDir: {} - - name: nginx-config - configMap: - name: rate-limit-nginx-config - {{- end }} - {{- if .Values.atlas.telegraf.enabled }} - - name: telegraf-conf - configMap: - name: atlas-telegrafd - {{- end }} -{{- end }} diff --git a/helm/atlas/templates/statefulset.yaml b/helm/atlas/templates/statefulset.yaml index da278a67550..77b0f4a6687 100644 --- a/helm/atlas/templates/statefulset.yaml +++ b/helm/atlas/templates/statefulset.yaml @@ -1,4 +1,3 @@ -{{- if not (and (.Values.deploy) (.Values.deploy.enabled)) }} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -65,6 +64,7 @@ spec: {{- if .Values.atlas.initContainers }} {{- toYaml .Values.atlas.initContainers | nindent 8 }} {{- end }} + serviceAccountName: cinv-sa containers: - name: {{ .Chart.Name }}-main command: [ @@ -244,5 +244,4 @@ spec: - name: telegraf-conf configMap: name: atlas-telegrafd - {{- end }} -{{- end }} + {{- end }} \ No newline at end of file diff --git a/helm/atlas/values.yaml b/helm/atlas/values.yaml index fddc62b2dea..58005741fa5 100644 --- a/helm/atlas/values.yaml +++ b/helm/atlas/values.yaml @@ -311,7 +311,7 @@ atlas: scheme: HTTP failureThreshold: 3 initialDelaySeconds: 30 - periodSeconds: 60 + periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5