diff --git a/Dockerfile b/Dockerfile index ee1af9fb..d019dec9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -56,5 +56,9 @@ RUN mkdir -p /app/src/lib/cli && ln -sf /app/src/esm/cli/alto.js /app/src/lib/cl # Always use JSON logs in production ENV ALTO_JSON=true +# Add labels for log collection +LABEL logging="alloy" +LABEL environment="production" + # start app ENTRYPOINT ["pnpm", "start"] diff --git a/docker-compose.observability.yml b/docker-compose.observability.yml new file mode 100644 index 00000000..7d8a9834 --- /dev/null +++ b/docker-compose.observability.yml @@ -0,0 +1,100 @@ +version: "3.8" + +services: + # Prometheus - Metrics collection and storage + prometheus: + image: prom/prometheus:v2.48.1 + container_name: alto-prometheus + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + ports: + - "9090:9090" + volumes: + - ./observability/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - alto-observability + restart: unless-stopped + + # Grafana Tempo - Distributed tracing backend + tempo: + image: grafana/tempo:2.3.1 + container_name: alto-tempo + command: ["-config.file=/etc/tempo.yml"] + ports: + - "3200:3200" # Tempo query frontend + - "4318:4318" # OTLP HTTP receiver + - "4317:4317" # OTLP gRPC receiver + volumes: + - ./observability/tempo.yml:/etc/tempo.yml:ro + - tempo-data:/var/tempo + networks: + - alto-observability + restart: unless-stopped + + # Grafana Loki - Log aggregation + loki: + image: grafana/loki:2.9.3 + container_name: alto-loki + command: ["-config.file=/etc/loki/local-config.yaml"] + ports: + - "3100:3100" + volumes: + - ./observability/loki.yml:/etc/loki/local-config.yaml:ro + - loki-data:/loki + networks: + - alto-observability + restart: unless-stopped + + # Grafana Alloy - Log collector + alloy: + image: grafana/alloy:v1.0.0 + container_name: alto-alloy + command: run --server.http.listen-addr=0.0.0.0:12345 --storage.path=/var/lib/alloy/data /etc/alloy/config.alloy + environment: + - ALTO_CLUSTER_NAME=${ALTO_CLUSTER_NAME:-alto-bundler} + ports: + - "12345:12345" + volumes: + - ./observability/alloy.alloy:/etc/alloy/config.alloy:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - alloy-data:/var/lib/alloy + networks: + - alto-observability + depends_on: + - loki + restart: unless-stopped + + # Grafana - Unified observability dashboard + grafana: + image: grafana/grafana:10.2.3 + container_name: alto-grafana + ports: + - "3003:3000" + environment: + # Default credentials: admin/admin (change in production) + - GF_SECURITY_ADMIN_PASSWORD=admin + volumes: + - ./observability/grafana-provisioning:/etc/grafana/provisioning:ro + - grafana-data:/var/lib/grafana + networks: + - alto-observability + depends_on: + - prometheus + - tempo + - loki + restart: unless-stopped + +networks: + alto-observability: + driver: bridge + +volumes: + prometheus-data: + tempo-data: + loki-data: + alloy-data: + grafana-data: diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..b05dc40c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,26 @@ +version: "3.8" + +services: + # Alto ERC-4337 Bundler + ultra-relay-provider: + build: + context: . + dockerfile: Dockerfile + container_name: ultra-relay-provider + labels: + logging: "alloy" + networks: + - alto-observability + environment: + # Override with your configuration + - ALTO_RPC_URL=${ALTO_RPC_URL} + - ALTO_PORT=3000 + - ALTO_ENABLE_TELEMETRY=true + - ALTO_OTLP_ENDPOINT=http://tempo:4318 + ports: + - "3000:3000" + restart: unless-stopped + +networks: + alto-observability: + name: alto-observability diff --git a/observability/alloy.alloy b/observability/alloy.alloy new file mode 100644 index 00000000..c1ac6bb4 --- /dev/null +++ b/observability/alloy.alloy @@ -0,0 +1,63 @@ +// Grafana Alloy configuration for Docker log collection + +// Discover containers with the logging="alloy" label +discovery.docker "containers" { + host = "unix:///var/run/docker.sock" + + filter { + name = "label" + values = ["logging=alloy"] + } +} + +// Define relabel rules first +loki.relabel "docker_labels" { + forward_to = [loki.write.loki.receiver] + + rule { + source_labels = ["__meta_docker_container_name"] + target_label = "container" + regex = "/(.*)" + replacement = "$1" + } + + rule { + source_labels = ["__meta_docker_container_label_environment"] + target_label = "environment" + } + + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_service"] + target_label = "service" + } + + rule { + source_labels = ["__meta_docker_container_label_com_docker_compose_project"] + target_label = "project" + } + + rule { + source_labels = ["__meta_docker_container_id"] + target_label = "container_id" + regex = "(.{12}).*" + replacement = "$1" + } +} + +// Collect Docker logs +loki.source.docker "docker_logs" { + host = "unix:///var/run/docker.sock" + targets = discovery.docker.containers.targets + forward_to = [loki.relabel.docker_labels.receiver] +} + +loki.write "loki" { + endpoint { + url = "http://loki:3100/loki/api/v1/push" + } + + external_labels = { + cluster = env("ALTO_CLUSTER_NAME"), + source = "alloy", + } +} diff --git a/observability/grafana-provisioning/datasources/datasources.yml b/observability/grafana-provisioning/datasources/datasources.yml new file mode 100644 index 00000000..cbb517f8 --- /dev/null +++ b/observability/grafana-provisioning/datasources/datasources.yml @@ -0,0 +1,47 @@ +apiVersion: 1 + +datasources: + # Prometheus datasource for metrics + - name: Prometheus + type: prometheus + uid: prometheus + access: proxy + url: http://prometheus:9090 + isDefault: true + editable: false + jsonData: + timeInterval: "15s" + + # Tempo datasource for distributed tracing + - name: Tempo + type: tempo + uid: tempo + access: proxy + url: http://tempo:3200 + editable: false + jsonData: + tracesToLogsV2: + datasourceUid: "loki" + spanStartTimeShift: "-1m" + spanEndTimeShift: "1m" + filterByTraceID: true + filterBySpanID: false + tags: ["trace_id"] + serviceMap: + datasourceUid: "prometheus" + nodeGraph: + enabled: true + + # Loki datasource for logs + - name: Loki + type: loki + uid: loki + access: proxy + url: http://loki:3100 + editable: false + jsonData: + derivedFields: + - datasourceUid: "tempo" + matcherRegex: "trace_id=(\\w+)" + name: "TraceID" + url: "$${__value.raw}" diff --git a/observability/loki.yml b/observability/loki.yml new file mode 100644 index 00000000..0708e75d --- /dev/null +++ b/observability/loki.yml @@ -0,0 +1,39 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + +limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + max_cache_freshness_per_query: 10m + split_queries_by_interval: 15m + +query_range: + align_queries_with_step: true + cache_results: true + +ruler: + alertmanager_url: http://localhost:9093 diff --git a/observability/prometheus.yml b/observability/prometheus.yml new file mode 100644 index 00000000..904a0a23 --- /dev/null +++ b/observability/prometheus.yml @@ -0,0 +1,18 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + external_labels: + cluster: "alto-bundler" + environment: "local" + +scrape_configs: + # Alto Bundler metrics + # Using service name (ultra-relay-provider) for cross-platform Docker compatibility + # This works on Linux, macOS, and Windows when bundler runs in the same Docker network + - job_name: "alto-bundler" + static_configs: + - targets: ["ultra-relay-provider:3000"] + labels: + service: "alto-bundler" + metrics_path: "/metrics" + scrape_interval: 10s diff --git a/observability/tempo.yml b/observability/tempo.yml new file mode 100644 index 00000000..7885cea0 --- /dev/null +++ b/observability/tempo.yml @@ -0,0 +1,34 @@ +server: + http_listen_port: 3200 + +distributor: + receivers: + otlp: + protocols: + http: + endpoint: 0.0.0.0:4318 + grpc: + endpoint: 0.0.0.0:4317 + +ingester: + trace_idle_period: 10s + max_block_bytes: 1_000_000 + max_block_duration: 5m + +compactor: + compaction: + compaction_window: 1h + max_block_bytes: 100_000_000 + block_retention: 1h + compacted_block_retention: 10m + +storage: + trace: + backend: local + local: + path: /var/tempo/traces + wal: + path: /var/tempo/wal + pool: + max_workers: 100 + queue_depth: 10000 diff --git a/package.json b/package.json index 356245a0..3491b34e 100644 --- a/package.json +++ b/package.json @@ -22,7 +22,7 @@ "build:contracts:EPSimulations07": "forge build --quiet --root contracts --evm-version paris --out ../src/contracts/ src/v07/EntryPointSimulations.sol", "build:contracts:EPSimulations08": "forge build --quiet --root contracts --evm-version cancun --out ../src/contracts/ src/v08/EntryPointSimulations.sol", "build:contracts": "pnpm run build:contracts:PimlicoSimulations && pnpm run build:contracts:EPFilterOpsOverride06 && pnpm run build:contracts:EPFilterOpsOverride07 && pnpm run build:contracts:EPFilterOpsOverride08 && pnpm run build:contracts:EPGasEstimationOverride06 && pnpm run build:contracts:EPSimulations07 && pnpm run build:contracts:EPSimulations08", - "start": "node src/esm/cli/alto.js run", + "start": "node --require ./src/esm/cli/instrumentation.js src/esm/cli/alto.js run", "dev": "nodemon --ext ts,js,json --watch src --exec DOTENV_CONFIG_PATH=$(pwd)/.env tsx --tsconfig src/tsconfig.json src/cli/alto.ts run", "test": "pnpm --filter e2e run test", "test:ci": "pnpm --filter e2e run test:ci", diff --git a/src/cli/config/bundler.ts b/src/cli/config/bundler.ts index a0023eec..fef1950f 100644 --- a/src/cli/config/bundler.ts +++ b/src/cli/config/bundler.ts @@ -340,9 +340,6 @@ export type IGasEstimationArgsInput = z.input export type IMempoolArgs = z.infer export type IMempoolArgsInput = z.input -export type IOptions = z.infer -export type IOptionsInput = z.input - export type IRedisArgs = z.infer export type IRedisArgsInput = z.input @@ -358,3 +355,6 @@ export const optionArgsSchema = z.object({ ...mempoolArgsSchema.shape, ...redisArgsSchema.shape }) + +export type IOptions = z.infer +export type IOptionsInput = z.input diff --git a/src/cli/instrumentation.ts b/src/cli/instrumentation.ts index a9025ca9..dcf1336b 100644 --- a/src/cli/instrumentation.ts +++ b/src/cli/instrumentation.ts @@ -51,23 +51,30 @@ class CustomSampler implements Sampler { } } -const sdk = new NodeSDK({ - traceExporter: new OTLPTraceExporter(), - instrumentations: [ - new HttpInstrumentation({ - requireParentforOutgoingSpans: true - }), - new UndiciInstrumentation({ - requireParentforSpans: true +if (process.env.ALTO_ENABLE_TELEMETRY === "true") { + const otlpEndpoint = + process.env.ALTO_OTLP_ENDPOINT || "http://localhost:4318/v1/traces" + + const sdk = new NodeSDK({ + traceExporter: new OTLPTraceExporter({ + url: otlpEndpoint }), - new FastifyInstrumentation(), - new PinoInstrumentation(), - new ViemInstrumentation({ - captureOperationResult: true - }) - ], - sampler: new ParentBasedSampler({ root: new CustomSampler() }) -}) + instrumentations: [ + new HttpInstrumentation({ + requireParentforOutgoingSpans: true + }), + new UndiciInstrumentation({ + requireParentforSpans: true + }), + new FastifyInstrumentation(), + new PinoInstrumentation(), + new ViemInstrumentation({ + captureOperationResult: true + }) + ], + sampler: new ParentBasedSampler({ root: new CustomSampler() }) + }) -sdk.start() -await waitForAllMessagesAcknowledged() + sdk.start() + await waitForAllMessagesAcknowledged() +} diff --git a/src/executor/bundleManager.ts b/src/executor/bundleManager.ts index cd35bf7f..7525c504 100644 --- a/src/executor/bundleManager.ts +++ b/src/executor/bundleManager.ts @@ -7,7 +7,12 @@ import type { } from "@alto/mempool" import type { ReceiptCache } from "@alto/receiptCache" import { createReceiptCache } from "@alto/receiptCache" -import type { HexData32, SubmittedBundleInfo, UserOpInfo } from "@alto/types" +import type { + HexData32, + RejectedUserOp, + SubmittedBundleInfo, + UserOpInfo +} from "@alto/types" import type { UserOperationReceipt } from "@alto/types" import type { Logger, Metrics } from "@alto/utils" import { parseUserOpReceipt } from "@alto/utils" @@ -194,7 +199,7 @@ export class BundleManager { // Fire and forget // Check if any rejected userOps were frontruns, if not mark as reverted onchain. - rejectedUserOps.map(async (userOpInfo) => { + rejectedUserOps.map(async (userOpInfo: RejectedUserOp) => { const status = await this.getUserOpStatus({ userOpInfo, entryPoint: submittedBundle.bundle.entryPoint, diff --git a/src/rpc/methods/eth_sendUserOperation.ts b/src/rpc/methods/eth_sendUserOperation.ts index 9d8bea63..920e5eb8 100644 --- a/src/rpc/methods/eth_sendUserOperation.ts +++ b/src/rpc/methods/eth_sendUserOperation.ts @@ -222,7 +222,6 @@ export const ethSendUserOperationHandler = createMethodHandler({ method: "eth_sendUserOperation", schema: sendUserOperationSchema, handler: async ({ rpcHandler, params, apiVersion }) => { - console.log("=== eth_sendUserOperation called ===") const [userOp, entryPoint] = params let status: "added" | "queued" | "rejected" = "rejected" diff --git a/src/rpc/validation/UnsafeValidator.ts b/src/rpc/validation/UnsafeValidator.ts index c9e6e6b3..20387d1f 100644 --- a/src/rpc/validation/UnsafeValidator.ts +++ b/src/rpc/validation/UnsafeValidator.ts @@ -617,7 +617,6 @@ export class UnsafeValidator implements InterfaceValidator { return validationResult } catch (e) { - // console.log(e) this.metrics.userOperationsValidationFailure.inc() throw e } diff --git a/src/rpc/validation/tracer.ts b/src/rpc/validation/tracer.ts index 0703d95f..b282ddb5 100644 --- a/src/rpc/validation/tracer.ts +++ b/src/rpc/validation/tracer.ts @@ -38,20 +38,10 @@ export async function debug_traceCall( }) .catch((e: unknown) => { if (e instanceof Error) { - // console.log("ex=", e.message) - // console.log( - // "tracer=", - // traceOptions.tracer - // ?.toString() - // .split("\n") - // .map((line, index) => `${index + 1}: ${line}`) - // .join("\n") - // ) throw e } sentry.captureException(e) }) - // console.log("ret=", ret) return ret } diff --git a/src/utils/toViemStateOverrides.ts b/src/utils/toViemStateOverrides.ts index 29f930fd..8d344e3f 100644 --- a/src/utils/toViemStateOverrides.ts +++ b/src/utils/toViemStateOverrides.ts @@ -54,7 +54,7 @@ export function toViemStateOverrides( result.push(entry) } catch (_e) { - console.warn(`Invalid address in state override: ${address}`) + // Invalid address in state override - silently skip } } diff --git a/test-observability.sh b/test-observability.sh new file mode 100755 index 00000000..aa1a9c80 --- /dev/null +++ b/test-observability.sh @@ -0,0 +1,338 @@ +#!/bin/bash +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo -e "${BLUE} Alto Observability Stack - Automated Validation${NC}" +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" + +FAILED=0 + +# Cleanup function +cleanup() { + echo -e "\n${YELLOW}🧹 Cleaning up...${NC}" + docker-compose -f docker-compose.observability.yml down -v > /dev/null 2>&1 || true + if [ ! -z "$ALTO_PID" ]; then + kill $ALTO_PID > /dev/null 2>&1 || true + fi + if [ ! -z "$ANVIL_PID" ]; then + kill $ANVIL_PID > /dev/null 2>&1 || true + fi +} + +trap cleanup EXIT + +# Check if jq is available +if ! command -v jq &> /dev/null; then + echo -e "${RED}✗${NC} jq is required but not installed" + echo -e "${YELLOW} Install jq:${NC}" + echo -e "${YELLOW} - macOS: brew install jq${NC}" + echo -e "${YELLOW} - Ubuntu/Debian: apt-get install jq${NC}" + echo -e "${YELLOW} - Fedora: dnf install jq${NC}" + echo -e "${YELLOW} - Or visit: https://jqlang.github.io/jq/download/${NC}" + exit 1 +fi + +# Check if Anvil is available +if ! command -v anvil &> /dev/null; then + echo -e "${YELLOW}⚠${NC} Anvil not found. Install Foundry: https://book.getfoundry.sh/getting-started/installation" + echo -e "${YELLOW} Falling back to localhost:8545 (may not work)${NC}\n" + USE_ANVIL=false +else + USE_ANVIL=true +fi + +# Check if RPC URL is set +if [ -z "$ALTO_RPC_URL" ]; then + echo -e "${YELLOW}⚠${NC} ALTO_RPC_URL not set, using default localhost" + ALTO_RPC_URL="http://localhost:8545" +fi + +# Generate test private keys if not set +if [ -z "$ALTO_EXECUTOR_PRIVATE_KEYS" ]; then + echo -e "${YELLOW}⚠${NC} ALTO_EXECUTOR_PRIVATE_KEYS not set, using test key" + ALTO_EXECUTOR_PRIVATE_KEYS="0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80" +fi + +if [ -z "$ALTO_UTILITY_PRIVATE_KEY" ]; then + echo -e "${YELLOW}⚠${NC} ALTO_UTILITY_PRIVATE_KEY not set, using test key" + # Use second Anvil test key + ALTO_UTILITY_PRIVATE_KEY="0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d" +fi + +# Set default entrypoint if not set +if [ -z "$ALTO_ENTRYPOINTS" ]; then + echo -e "${YELLOW}⚠${NC} ALTO_ENTRYPOINTS not set, using v0.7 default" + ALTO_ENTRYPOINTS="0x0000000071727De22E5E9d8BAf0edAc6f37da032" +fi + +# Step 1: Build Alto +echo -e "${BLUE}[1/9]${NC} Building Alto..." +if pnpm build > /tmp/alto-build.log 2>&1; then + echo -e "${GREEN}✓${NC} Build successful" +else + echo -e "${RED}✗${NC} Build failed" + cat /tmp/alto-build.log + exit 1 +fi + +# Step 2: Start Observability Stack +echo -e "\n${BLUE}[2/9]${NC} Starting observability stack..." +docker-compose -f docker-compose.observability.yml up -d + +# Step 3: Wait for services to be healthy +echo -e "\n${BLUE}[3/9]${NC} Waiting for services to be ready..." + +wait_for_service() { + local name=$1 + local url=$2 + local max_attempts=30 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + if curl -sf "$url" > /dev/null 2>&1; then + echo -e "${GREEN}✓${NC} $name ready" + return 0 + fi + sleep 2 + attempt=$((attempt + 1)) + done + + echo -e "${RED}✗${NC} $name failed to start" + FAILED=$((FAILED + 1)) + return 1 +} + +wait_for_service "Prometheus" "http://localhost:9090/-/healthy" +wait_for_service "Tempo" "http://localhost:3200/ready" +wait_for_service "Loki" "http://localhost:3100/ready" +wait_for_service "Alloy" "http://localhost:12345/-/ready" +wait_for_service "Grafana" "http://localhost:3003/api/health" + +# Step 4: Start Anvil (local Ethereum node) +echo -e "\n${BLUE}[4/9]${NC} Starting local Ethereum node..." + +if [ "$USE_ANVIL" = true ]; then + anvil --host 0.0.0.0 --port 8545 > /tmp/anvil.log 2>&1 & + ANVIL_PID=$! + + # Wait for Anvil to be ready + sleep 3 + + if kill -0 $ANVIL_PID 2>/dev/null; then + echo -e "${GREEN}✓${NC} Anvil started (PID: $ANVIL_PID)" + ALTO_RPC_URL="http://localhost:8545" + else + echo -e "${RED}✗${NC} Anvil failed to start" + cat /tmp/anvil.log + ANVIL_PID="" + fi +else + echo -e "${YELLOW}⚠${NC} Skipping (Anvil not installed)" +fi + +# Step 5: Start Alto with telemetry +echo -e "\n${BLUE}[5/9]${NC} Starting Alto with telemetry enabled..." + +ALTO_ENABLE_TELEMETRY=true \ +ALTO_OTLP_ENDPOINT=http://localhost:4318/v1/traces \ +ALTO_RPC_URL="$ALTO_RPC_URL" \ +ALTO_ENTRYPOINTS="$ALTO_ENTRYPOINTS" \ +ALTO_EXECUTOR_PRIVATE_KEYS="$ALTO_EXECUTOR_PRIVATE_KEYS" \ +ALTO_UTILITY_PRIVATE_KEY="$ALTO_UTILITY_PRIVATE_KEY" \ +ALTO_PORT=3000 \ +ALTO_JSON=true \ +ALTO_SAFE_MODE=false \ +pnpm start > /tmp/alto.log 2>&1 & + +ALTO_PID=$! + +# Wait for Alto to start +sleep 5 + +if kill -0 $ALTO_PID 2>/dev/null; then + echo -e "${GREEN}✓${NC} Alto started (PID: $ALTO_PID)" + + # Wait for Alto to be responsive + if wait_for_service "Alto" "http://localhost:3000/metrics"; then + ALTO_RUNNING=true + else + echo -e "${YELLOW}⚠${NC} Alto not responding (likely RPC connection issue)" + echo -e "${YELLOW} Check /tmp/alto.log for details${NC}" + ALTO_RUNNING=false + kill $ALTO_PID 2>/dev/null || true + ALTO_PID="" + fi +else + echo -e "${YELLOW}⚠${NC} Alto failed to start (likely RPC connection issue)" + echo -e "${YELLOW} Continuing with observability stack validation...${NC}" + ALTO_RUNNING=false + ALTO_PID="" +fi + +# Step 6: Generate test traffic +echo -e "\n${BLUE}[6/10]${NC} Generating test traffic..." + +if [ "$ALTO_RUNNING" = true ]; then + for i in {1..5}; do + curl -sf -X POST http://localhost:3000 \ + -H "Content-Type: application/json" \ + -d '{ + "jsonrpc": "2.0", + "id": 1, + "method": "eth_supportedEntryPoints", + "params": [] + }' > /dev/null + sleep 1 + done + + echo -e "${GREEN}✓${NC} Generated 5 test requests" + + # Give telemetry time to propagate + echo -e "${YELLOW}⏳${NC} Waiting 10s for telemetry to propagate..." + sleep 10 +else + echo -e "${YELLOW}⚠${NC} Skipping (Alto not running)" +fi + +# Step 7: Validate Metrics +echo -e "\n${BLUE}[7/10]${NC} Validating metrics collection..." + +if [ "$ALTO_RUNNING" = true ]; then + METRICS=$(curl -sf http://localhost:3000/metrics) + + if echo "$METRICS" | grep -q "process_cpu_seconds_total"; then + echo -e "${GREEN}✓${NC} Alto exposing metrics" + else + echo -e "${YELLOW}⚠${NC} Alto not exposing metrics" + fi + + PROM_TARGETS=$(curl -sf http://localhost:9090/api/v1/targets | jq -r '.data.activeTargets[] | select(.labels.job == "alto-bundler") | .health') + + if [ "$PROM_TARGETS" = "up" ]; then + echo -e "${GREEN}✓${NC} Prometheus scraping Alto metrics" + else + echo -e "${YELLOW}⚠${NC} Prometheus not scraping Alto (Alto not running)" + fi +else + echo -e "${YELLOW}⚠${NC} Skipping (Alto not running)" + echo -e "${BLUE}ℹ${NC} Prometheus ready at http://localhost:9090" +fi + +# Step 8: Validate Logs +echo -e "\n${BLUE}[8/10]${NC} Validating logs collection..." + +if [ "$ALTO_RUNNING" = true ]; then + LOKI_QUERY=$(curl -sf -G "http://localhost:3100/loki/api/v1/query" \ + --data-urlencode 'query={logging="alloy"}' \ + --data-urlencode "time=$(date +%s)000000000" | jq -r '.data.result | length') + + if [ "$LOKI_QUERY" -gt 0 ]; then + echo -e "${GREEN}✓${NC} Loki collecting logs ($LOKI_QUERY streams found)" + else + echo -e "${YELLOW}⚠${NC} Loki not collecting logs yet (Alto just started)" + fi +else + echo -e "${YELLOW}⚠${NC} Skipping (Alto not running)" + echo -e "${BLUE}ℹ${NC} Loki ready at http://localhost:3100" +fi + +# Step 9: Validate Traces +echo -e "\n${BLUE}[9/10]${NC} Validating traces collection..." + +if [ "$ALTO_RUNNING" = true ]; then + TEMPO_TRACES=$(curl -sf "http://localhost:3200/api/search?limit=100" | jq -r '.traces | length') + + if [ "$TEMPO_TRACES" -gt 0 ]; then + echo -e "${GREEN}✓${NC} Tempo collecting traces ($TEMPO_TRACES traces found)" + + # Validate trace details + TRACE_ID=$(curl -sf "http://localhost:3200/api/search?limit=1" | jq -r '.traces[0].traceID') + + if [ ! -z "$TRACE_ID" ] && [ "$TRACE_ID" != "null" ]; then + TRACE_DETAILS=$(curl -sf "http://localhost:3200/api/traces/$TRACE_ID") + + if echo "$TRACE_DETAILS" | jq -e '.batches[0].resource.attributes[] | select(.key == "service.name" and .value.stringValue == "alto")' > /dev/null 2>&1; then + echo -e "${GREEN}✓${NC} Traces contain Alto service spans" + fi + + SPAN_COUNT=$(echo "$TRACE_DETAILS" | jq -r '.batches[0].scopeSpans[0].spans | length' 2>/dev/null || echo "0") + if [ "$SPAN_COUNT" -gt 0 ]; then + echo -e "${GREEN}✓${NC} Trace contains $SPAN_COUNT spans" + fi + fi + else + echo -e "${YELLOW}⚠${NC} Tempo not collecting traces yet (Alto just started)" + fi +else + echo -e "${YELLOW}⚠${NC} Skipping (Alto not running)" + echo -e "${BLUE}ℹ${NC} Tempo ready at http://localhost:3200" +fi + +# Step 10: Validate Grafana Datasources +echo -e "\n${BLUE}[10/10]${NC} Validating Grafana datasources..." + +DATASOURCES=$(curl -sf -u admin:admin http://localhost:3003/api/datasources | jq -r '.[].name') + +check_datasource() { + local name=$1 + if echo "$DATASOURCES" | grep -q "$name"; then + echo -e "${GREEN}✓${NC} $name datasource configured" + else + echo -e "${RED}✗${NC} $name datasource missing" + FAILED=$((FAILED + 1)) + fi +} + +check_datasource "Prometheus" +check_datasource "Tempo" +check_datasource "Loki" + +# Final Report +echo -e "\n${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" +echo -e "${BLUE} Validation Results${NC}" +echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n" + +if [ $FAILED -eq 0 ]; then + echo -e "${GREEN}✅ Observability stack validation complete!${NC}\n" + + if [ "$ALTO_RUNNING" = true ]; then + echo -e "${GREEN}✓ Alto running with telemetry enabled${NC}" + else + echo -e "${YELLOW}⚠ Alto not running (requires valid RPC endpoint)${NC}" + echo -e " To test with Alto, set ALTO_RPC_URL and re-run\n" + fi + + echo -e "Access the observability stack:" + echo -e " • Grafana: ${BLUE}http://localhost:3003${NC} (admin/admin)" + echo -e " • Prometheus: ${BLUE}http://localhost:9090${NC}" + echo -e " • Tempo: ${BLUE}http://localhost:3200${NC}" + echo -e " • Loki: ${BLUE}http://localhost:3100${NC}" + + if [ "$ALTO_RUNNING" = true ]; then + echo -e " • Alto: ${BLUE}http://localhost:3000${NC}" + fi + + echo -e "\n${YELLOW}Stack will remain running. Press Ctrl+C to stop and cleanup.${NC}\n" + + # Keep running if Alto is running + if [ ! -z "$ALTO_PID" ]; then + wait $ALTO_PID + else + # Just wait indefinitely if Alto isn't running + echo "Press Ctrl+C to stop..." + while true; do sleep 1; done + fi + exit 0 +else + echo -e "${RED}❌ $FAILED check(s) failed${NC}\n" + echo -e "${YELLOW}Logs available at:${NC}" + echo -e " • Alto: /tmp/alto.log" + echo -e " • Build: /tmp/alto-build.log\n" + exit 1 +fi