From 9759156f334cd653d946e9871ab850115c1721d0 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Sat, 6 Sep 2025 02:05:44 +0000 Subject: [PATCH 1/5] Remove token management from build script and add OAuth token refresher service MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove update_auth_tokens() function from build_and_run.sh to separate concerns - Remove automatic token refresher startup from deployment script - Add standalone token refresher service with MCP config generation - Add token_refresher.pid to .gitignore - Add support for no-auth services in token refresher alongside egress token services - Clean up emoji usage in logging for better compatibility 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .gitignore | 1 + agents/agent.py | 70 +- build_and_run.sh | 93 +- credentials-provider/oauth/ingress_oauth.py | 12 +- credentials-provider/token_refresher.py | 1062 +++++++++++++++++++ pyproject.toml | 2 + registry/health/service.py | 128 ++- start_token_refresher.sh | 68 ++ uv.lock | 126 ++- 9 files changed, 1441 insertions(+), 121 deletions(-) create mode 100755 credentials-provider/token_refresher.py create mode 100755 start_token_refresher.sh diff --git a/.gitignore b/.gitignore index 122f300..f12b7d0 100644 --- a/.gitignore +++ b/.gitignore @@ -197,6 +197,7 @@ cookies.txt registry/server_state.json registry/nginx_mcp_revproxy.conf logs/ +token_refresher.pid # Secrets and API keys - never commit these! .keys.yml diff --git a/agents/agent.py b/agents/agent.py index ab27f45..968835f 100644 --- a/agents/agent.py +++ b/agents/agent.py @@ -771,6 +771,8 @@ def print_agent_response(response_dict: Dict[str, Any], verbose: bool = False) - response_dict: Dictionary containing the agent response with 'messages' key verbose: Whether to show detailed debug information """ + # Debug: Log entry to function + logger.debug(f"print_agent_response called with verbose={verbose}, response_dict keys: {response_dict.keys() if response_dict else 'None'}") if verbose: # Define ANSI color codes for different message types COLORS = { @@ -844,17 +846,73 @@ def print_agent_response(response_dict: Dict[str, Any], verbose: bool = False) - logger.info(f"{'=' * 20} END OF {msg_type} MESSAGE #{i} {'=' * 20}{reset}") logger.info("") - # Always show the final AI response + # Always show the final AI response (both in verbose and non-verbose mode) + # This section runs regardless of verbose flag + if not verbose: + logger.info("=== Attempting to print final response (non-verbose mode) ===") + if response_dict and "messages" in response_dict and response_dict["messages"]: + # Debug: Log that we're looking for the final AI message + if not verbose: + logger.info(f"Found {len(response_dict['messages'])} messages in response") + # Get the last AI message from the response for message in reversed(response_dict["messages"]): message_type = type(message).__name__ - if "AIMessage" in message_type or "ai" in str(message).lower(): - if isinstance(message, dict) and "content" in message: - print("\n" + message["content"]) + + # Debug logging in non-verbose mode to understand what's happening + if not verbose: + logger.debug(f"Checking message type: {message_type}") + + # Check if this is an AI message + if "AIMessage" in message_type or "ai" in str(type(message)).lower(): + # Extract and print the content + content = None + + # Try different ways to extract content + if hasattr(message, 'content'): + content = message.content + elif isinstance(message, dict) and "content" in message: + content = message["content"] else: - print("\n" + str(message.content)) + # Try to extract content from string representation as last resort + try: + content = str(message) + except: + content = None + + # Print the content if we found any + if content: + # Force print the final response regardless of any conditions + print("\n" + str(content), flush=True) + + if not verbose: + logger.info(f"Final AI Response printed (length: {len(str(content))} chars)") + else: + if not verbose: + logger.warning(f"AI message found but no content extracted. Message type: {message_type}, Message attrs: {dir(message) if hasattr(message, '__dict__') else 'N/A'}") + + # We found an AI message, stop looking break + else: + # No AI message found - try to print the last message regardless + if not verbose: + logger.warning("No AI message found in response, attempting to print last message") + logger.debug(f"Messages in response: {[type(m).__name__ for m in response_dict['messages']]}") + + # As a fallback, print the last message if it has content + if response_dict["messages"]: + last_message = response_dict["messages"][-1] + content = None + + if hasattr(last_message, 'content'): + content = last_message.content + elif isinstance(last_message, dict) and "content" in last_message: + content = last_message["content"] + + if content: + print("\n[Response]\n" + str(content), flush=True) + logger.info(f"Printed last message as fallback (type: {type(last_message).__name__})") class InteractiveAgent: @@ -1232,6 +1290,8 @@ async def main(): if not args.interactive: # Single-turn mode - just show the response and exit logger.info("\nResponse:" + "\n" + "-"*40) + logger.debug(f"Calling print_agent_response with verbose={args.verbose}") + logger.debug(f"Response has {len(response.get('messages', []))} messages") print_agent_response(response, args.verbose) return else: diff --git a/build_and_run.sh b/build_and_run.sh index 898ea26..7977db3 100755 --- a/build_and_run.sh +++ b/build_and_run.sh @@ -14,65 +14,6 @@ handle_error() { exit 1 } -# Update auth tokens from .oauth-tokens files -update_auth_tokens() { - log "Updating auth tokens from .oauth-tokens directory..." - - OAUTH_TOKENS_DIR="$PWD/.oauth-tokens" - if [ ! -d "$OAUTH_TOKENS_DIR" ]; then - log "No .oauth-tokens directory found at $OAUTH_TOKENS_DIR - skipping token updates" - return - fi - - # Create backup of .env (overwrite previous backup) - cp .env .env.backup - - # Process each egress.json file - for egress_file in "$OAUTH_TOKENS_DIR"/*egress.json; do - if [ ! -f "$egress_file" ]; then - continue - fi - - # Extract server name from filename (remove egress.json suffix) - filename=$(basename "$egress_file") - server_name=$(echo "$filename" | sed 's/egress\.json$//') - - # Map specific server names to expected environment variable names - case "$server_name" in - "atlassian-atlassian-") - env_var_name="ATLASSIAN_AUTH_TOKEN" - ;; - "bedrock-agentcore-sre-gateway-") - env_var_name="SRE_GATEWAY_AUTH_TOKEN" - ;; - *) - # Generic conversion: convert to uppercase and replace hyphens with underscores - env_var_name=$(echo "${server_name}" | sed 's/-$//' | tr '[:lower:]' '[:upper:]' | tr '-' '_')_AUTH_TOKEN - ;; - esac - - # Extract access_token from JSON file - if command -v jq &> /dev/null; then - access_token=$(jq -r '.access_token // empty' "$egress_file" 2>/dev/null) - else - # Fallback method without jq - access_token=$(grep -o '"access_token"[[:space:]]*:[[:space:]]*"[^"]*"' "$egress_file" | sed 's/.*"access_token"[[:space:]]*:[[:space:]]*"\([^"]*\)".*/\1/') - fi - - if [ -n "$access_token" ] && [ "$access_token" != "null" ]; then - log "Found token for $server_name -> $env_var_name" - - # Remove existing token line if present - sed -i "/^${env_var_name}=/d" .env - - # Add new token - echo "${env_var_name}=\"${access_token}\"" >> .env - log "✓ Updated $env_var_name in .env" - else - log "⚠ No valid access_token found in $egress_file" - fi - done -} log "Starting MCP Gateway Docker Compose deployment script" @@ -131,9 +72,6 @@ fi log "Found .env file" -# Update auth tokens from .oauth-tokens files -update_auth_tokens - # Check if docker-compose is installed if ! command -v docker-compose &> /dev/null; then log "ERROR: docker-compose is not installed" @@ -186,9 +124,9 @@ if [ -d "registry/servers" ]; then # Verify atlassian.json was copied if [ -f "$MCPGATEWAY_SERVERS_DIR/atlassian.json" ]; then - log "✓ atlassian.json copied successfully" + log "atlassian.json copied successfully" else - log "⚠ atlassian.json not found in copied files" + log "WARNING: atlassian.json not found in copied files" fi else log "No JSON files found in registry/servers" @@ -206,7 +144,7 @@ if [ -f "auth_server/scopes.yml" ]; then # Copy scopes.yml sudo cp auth_server/scopes.yml "$AUTH_SERVER_DIR/" - log "✓ scopes.yml copied successfully to $AUTH_SERVER_DIR" + log "scopes.yml copied successfully to $AUTH_SERVER_DIR" else log "WARNING: auth_server/scopes.yml not found" fi @@ -258,23 +196,23 @@ log "Verifying services are healthy..." # Check registry service if curl -f http://localhost:7860/health &>/dev/null; then - log "✓ Registry service is healthy" + log "Registry service is healthy" else - log "⚠ Registry service may still be starting up..." + log "WARNING: Registry service may still be starting up..." fi # Check auth service if curl -f http://localhost:8888/health &>/dev/null; then - log "✓ Auth service is healthy" + log "Auth service is healthy" else - log "⚠ Auth service may still be starting up..." + log "WARNING: Auth service may still be starting up..." fi # Check nginx is responding if curl -f http://localhost:80 &>/dev/null || curl -k -f https://localhost:443 &>/dev/null; then - log "✓ Nginx is responding" + log "Nginx is responding" else - log "⚠ Nginx may still be starting up..." + log "WARNING: Nginx may still be starting up..." fi # Verify FAISS index creation @@ -282,22 +220,22 @@ log "Verifying FAISS index creation..." sleep 5 # Give registry service time to create the index if [ -f "$MCPGATEWAY_SERVERS_DIR/service_index.faiss" ]; then - log "✓ FAISS index created successfully at $MCPGATEWAY_SERVERS_DIR/service_index.faiss" + log "FAISS index created successfully at $MCPGATEWAY_SERVERS_DIR/service_index.faiss" # Check if metadata file also exists if [ -f "$MCPGATEWAY_SERVERS_DIR/service_index_metadata.json" ]; then - log "✓ FAISS index metadata created successfully" + log "FAISS index metadata created successfully" else - log "⚠ FAISS index metadata file not found" + log "WARNING: FAISS index metadata file not found" fi else - log "⚠ FAISS index not yet created. The registry service will create it on first access." + log "WARNING: FAISS index not yet created. The registry service will create it on first access." fi # Verify server list includes Atlassian log "Verifying server list..." if [ -f "$MCPGATEWAY_SERVERS_DIR/atlassian.json" ]; then - log "✓ Atlassian server configuration present" + log "Atlassian server configuration present" fi # List all available server JSON files @@ -306,9 +244,10 @@ if ls "$MCPGATEWAY_SERVERS_DIR"/*.json 2>/dev/null | head -n 10; then TOTAL_SERVERS=$(ls "$MCPGATEWAY_SERVERS_DIR"/*.json 2>/dev/null | wc -l) log "Total server configurations: $TOTAL_SERVERS" else - log "⚠ No server configurations found in $MCPGATEWAY_SERVERS_DIR" + log "WARNING: No server configurations found in $MCPGATEWAY_SERVERS_DIR" fi + log "Deployment completed successfully" log "" log "Services are available at:" diff --git a/credentials-provider/oauth/ingress_oauth.py b/credentials-provider/oauth/ingress_oauth.py index 5e6bcbe..3fab9ea 100644 --- a/credentials-provider/oauth/ingress_oauth.py +++ b/credentials-provider/oauth/ingress_oauth.py @@ -168,7 +168,7 @@ def _perform_m2m_authentication( "region": region } - logger.info("🎉 M2M token obtained successfully!") + logger.info("M2M token obtained successfully!") if expires_at: expires_in = int(expires_at - time.time()) @@ -214,7 +214,7 @@ def _save_ingress_tokens(token_data: Dict[str, Any]) -> str: # Secure the file ingress_path.chmod(0o600) - logger.info(f"📁 Saved ingress tokens to: {ingress_path}") + logger.info(f"Saved ingress tokens to: {ingress_path}") return str(ingress_path) @@ -290,7 +290,7 @@ def main() -> int: user_pool_id = os.getenv("INGRESS_OAUTH_USER_POOL_ID") region = os.getenv("AWS_REGION", "us-east-1") - logger.info("🔐 Starting INGRESS OAuth authentication (Cognito M2M)") + logger.info("Starting INGRESS OAuth authentication (Cognito M2M)") logger.info(f"User Pool ID: {user_pool_id}") logger.info(f"Client ID: {client_id[:10]}...") logger.info(f"Region: {region}") @@ -299,7 +299,7 @@ def main() -> int: if not args.force: existing_tokens = _load_existing_tokens() if existing_tokens: - logger.info("✅ Using existing valid ingress token") + logger.info("Using existing valid ingress token") logger.info(f"Token expires at: {existing_tokens.get('expires_at_human', 'Unknown')}") return 0 @@ -314,13 +314,13 @@ def main() -> int: # Save tokens saved_path = _save_ingress_tokens(token_data) - logger.info("✅ INGRESS OAuth authentication completed successfully!") + logger.info("INGRESS OAuth authentication completed successfully!") logger.info(f"Tokens saved to: {saved_path}") return 0 except Exception as e: - logger.error(f"❌ INGRESS OAuth authentication failed: {e}") + logger.error(f"ERROR: INGRESS OAuth authentication failed: {e}") if args.verbose: import traceback logger.error(traceback.format_exc()) diff --git a/credentials-provider/token_refresher.py b/credentials-provider/token_refresher.py new file mode 100755 index 0000000..2f544f1 --- /dev/null +++ b/credentials-provider/token_refresher.py @@ -0,0 +1,1062 @@ +#!/usr/bin/env python3 +""" +OAuth Token Refresher Service + +This service monitors OAuth tokens in the .oauth-tokens directory and automatically +refreshes them before they expire. It runs continuously in the background, checking +tokens every configurable interval (default 5 minutes). + +Usage: + uv run python credentials-provider/token_refresher.py # Run with defaults + uv run python credentials-provider/token_refresher.py --interval 300 # Check every 5 minutes + uv run python credentials-provider/token_refresher.py --buffer 3600 # Refresh 1 hour before expiry + uv run python credentials-provider/token_refresher.py --once # Run once and exit + uv run python credentials-provider/token_refresher.py --once --force # Force refresh all tokens once and exit + nohup uv run python credentials-provider/token_refresher.py > token_refresher.log 2>&1 & # Run in background +""" + +import argparse +import json +import logging +import os +import psutil +import signal +import subprocess +import sys +import tempfile +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +# Configure logging with basicConfig +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s,p%(process)s,{%(filename)s:%(lineno)d},%(levelname)s,%(message)s", +) +logger = logging.getLogger(__name__) + + +# Configuration constants +DEFAULT_CHECK_INTERVAL = 300 # 5 minutes in seconds +DEFAULT_EXPIRY_BUFFER = 3600 # 1 hour buffer before expiry + +# Process management +PIDFILE_NAME = "token_refresher.pid" + +# Dynamically determine paths relative to this script's location +SCRIPT_DIR = Path(__file__).parent +PROJECT_ROOT = SCRIPT_DIR.parent +OAUTH_TOKENS_DIR = PROJECT_ROOT / ".oauth-tokens" +CREDENTIALS_PROVIDER_DIR = SCRIPT_DIR + +# Files to ignore during token refresh (derived files that get regenerated) +IGNORED_FILES = { + "mcp.json", + "vscode_mcp.json", + "*readable*", # Any file with "readable" in the name +} + + +def _should_ignore_file(filename: str) -> bool: + """ + Check if a token file should be ignored. + + Args: + filename: Name of the token file + + Returns: + True if file should be ignored, False otherwise + """ + # Check exact matches + if filename in {"mcp.json", "vscode_mcp.json"}: + return True + + # Check for "readable" in filename + if "readable" in filename.lower(): + return True + + return False + + +def _parse_token_file(filepath: Path) -> Optional[Dict]: + """ + Parse a token JSON file and extract relevant information. + + Args: + filepath: Path to the token file + + Returns: + Token data dict or None if file cannot be parsed + """ + try: + with open(filepath, 'r') as f: + data = json.load(f) + + # Validate required fields + if 'expires_at' not in data: + logger.debug(f"No expires_at field in {filepath.name}") + return None + + return data + except (json.JSONDecodeError, IOError) as e: + logger.warning(f"Failed to parse {filepath.name}: {e}") + return None + + +def _get_all_tokens() -> List[Tuple[Path, Dict]]: + """ + Get all valid token files regardless of expiration status. + + Returns: + List of (filepath, token_data) tuples for all valid tokens + """ + if not OAUTH_TOKENS_DIR.exists(): + logger.error(f"OAuth tokens directory not found: {OAUTH_TOKENS_DIR}") + return [] + + all_tokens = [] + + for filepath in OAUTH_TOKENS_DIR.glob("*.json"): + # Skip ignored files + if _should_ignore_file(filepath.name): + logger.debug(f"Ignoring file: {filepath.name}") + continue + + # Parse token file + token_data = _parse_token_file(filepath) + if not token_data: + continue + + logger.info(f"Found token file: {filepath.name}") + logger.debug(f"Reading token from: {filepath.absolute()}") + all_tokens.append((filepath, token_data)) + + return all_tokens + + +def _get_expiring_tokens(buffer_seconds: int = DEFAULT_EXPIRY_BUFFER) -> List[Tuple[Path, Dict]]: + """ + Find all tokens that are expired or will expire within the buffer period. + + Args: + buffer_seconds: Number of seconds before expiry to trigger refresh + + Returns: + List of (filepath, token_data) tuples for expiring tokens + """ + if not OAUTH_TOKENS_DIR.exists(): + logger.error(f"OAuth tokens directory not found: {OAUTH_TOKENS_DIR}") + return [] + + current_time = time.time() + expiring_tokens = [] + + for filepath in OAUTH_TOKENS_DIR.glob("*.json"): + # Skip ignored files + if _should_ignore_file(filepath.name): + logger.debug(f"Ignoring file: {filepath.name}") + continue + + # Parse token file + token_data = _parse_token_file(filepath) + if not token_data: + continue + + logger.debug(f"Reading token from: {filepath.absolute()}") + + # Check expiration + expires_at = token_data.get('expires_at', 0) + time_until_expiry = expires_at - current_time + + if time_until_expiry <= buffer_seconds: + hours_until_expiry = time_until_expiry / 3600 + if time_until_expiry <= 0: + logger.warning(f"Token EXPIRED: {filepath.name} (expired {-hours_until_expiry:.1f} hours ago)") + else: + logger.info(f"Token expiring soon: {filepath.name} (expires in {hours_until_expiry:.1f} hours)") + logger.debug(f"Will refresh token at: {filepath.absolute()}") + expiring_tokens.append((filepath, token_data)) + + return expiring_tokens + + +def _determine_refresh_method(token_data: Dict, filename: str) -> Optional[str]: + """ + Determine which refresh method to use based on token data. + + Args: + token_data: Parsed token data + filename: Token filename + + Returns: + Refresh method ('agentcore' or 'oauth') or None if cannot determine + """ + provider = token_data.get('provider', '').lower() + + # Check for AgentCore/Bedrock tokens + if 'bedrock' in provider or 'agentcore' in provider: + return 'agentcore' + + # Check for OAuth providers (Atlassian, Google, GitHub, etc.) + oauth_providers = ['atlassian', 'google', 'github', 'microsoft', 'oauth'] + if any(p in provider for p in oauth_providers): + return 'oauth' + + # Try to infer from filename + if 'bedrock' in filename.lower() or 'agentcore' in filename.lower(): + return 'agentcore' + + if 'egress' in filename.lower() or 'ingress' in filename.lower(): + return 'oauth' + + logger.warning(f"Cannot determine refresh method for {filename} with provider '{provider}'") + return None + + +def _refresh_agentcore_token(token_data: Dict, filename: str) -> bool: + """ + Refresh a Bedrock AgentCore token using generate_access_token.py. + + Args: + token_data: Current token data + filename: Token filename + + Returns: + True if refresh successful, False otherwise + """ + script_path = CREDENTIALS_PROVIDER_DIR / "agentcore-auth" / "generate_access_token.py" + + if not script_path.exists(): + logger.error(f"AgentCore refresh script not found: {script_path}") + return False + + try: + # Extract server name from filename if possible + # Format: bedrock-agentcore-{server_name}-egress.json + server_name = None + if filename.startswith("bedrock-agentcore-") and filename.endswith("-egress.json"): + server_name = filename.replace("bedrock-agentcore-", "").replace("-egress.json", "") + + logger.info(f"Refreshing AgentCore token for: {server_name or 'default'}") + + # Run the refresh script using uv run + cmd = ["uv", "run", "python", str(script_path)] + if server_name: + # The script might accept server-specific parameters + # Check the script for available options + pass + + logger.debug(f"Running AgentCore refresh command: {' '.join(cmd)}") + logger.debug(f"Working directory: {PROJECT_ROOT.absolute()}") + + result = subprocess.run( + cmd, + cwd=PROJECT_ROOT, + capture_output=True, + text=True, + timeout=30 + ) + + if result.returncode == 0: + logger.info(f"Successfully refreshed AgentCore token: {filename}") + return True + else: + logger.error(f"Failed to refresh AgentCore token: {result.stderr}") + return False + + except subprocess.TimeoutExpired: + logger.error(f"Timeout refreshing AgentCore token: {filename}") + return False + except Exception as e: + logger.error(f"Error refreshing AgentCore token {filename}: {e}") + return False + + +def _refresh_oauth_token(token_data: Dict, filename: str) -> bool: + """ + Refresh a generic OAuth token using egress_oauth.py or ingress_oauth.py. + + Args: + token_data: Current token data + filename: Token filename + + Returns: + True if refresh successful, False otherwise + """ + # Determine which OAuth script to use + if 'ingress' in filename.lower(): + script_name = "ingress_oauth.py" + # Ingress uses Cognito M2M and doesn't accept --provider argument + use_provider_arg = False + else: + script_name = "egress_oauth.py" # Default to egress + use_provider_arg = True + + script_path = CREDENTIALS_PROVIDER_DIR / "oauth" / script_name + + if not script_path.exists(): + logger.error(f"OAuth refresh script not found: {script_path}") + return False + + try: + provider = token_data.get('provider', 'atlassian') + logger.info(f"Refreshing OAuth token for provider: {provider}") + + # Build command based on script type + cmd = ["uv", "run", "python", str(script_path)] + + # Only add --provider for egress OAuth (not ingress) + if use_provider_arg: + cmd.extend(["--provider", provider]) + + logger.debug(f"Running OAuth refresh command: {' '.join(cmd)}") + logger.debug(f"Working directory: {PROJECT_ROOT.absolute()}") + + # Check if we have a refresh token + if 'refresh_token' in token_data: + # The script should handle refresh token flow + pass + + result = subprocess.run( + cmd, + cwd=PROJECT_ROOT, + capture_output=True, + text=True, + timeout=60 # OAuth flow might take longer + ) + + if result.returncode == 0: + logger.info(f"Successfully refreshed OAuth token: {filename}") + return True + else: + logger.error(f"Failed to refresh OAuth token: {result.stderr}") + return False + + except subprocess.TimeoutExpired: + logger.error(f"Timeout refreshing OAuth token: {filename}") + return False + except Exception as e: + logger.error(f"Error refreshing OAuth token {filename}: {e}") + return False + + +def _refresh_token(filepath: Path, token_data: Dict) -> bool: + """ + Refresh a single token based on its type. + + Args: + filepath: Path to the token file + token_data: Parsed token data + + Returns: + True if refresh successful, False otherwise + """ + filename = filepath.name + refresh_method = _determine_refresh_method(token_data, filename) + + if not refresh_method: + logger.error(f"Cannot determine how to refresh {filename}") + return False + + if refresh_method == 'agentcore': + return _refresh_agentcore_token(token_data, filename) + elif refresh_method == 'oauth': + return _refresh_oauth_token(token_data, filename) + else: + logger.error(f"Unknown refresh method: {refresh_method}") + return False + + +def _scan_noauth_services() -> List[Dict]: + """ + Scan registry servers and find services with auth_type: none. + + Returns: + List of no-auth service configurations + """ + registry_dir = PROJECT_ROOT / "registry" / "servers" + noauth_services = [] + + if not registry_dir.exists(): + logger.warning(f"Registry servers directory not found: {registry_dir}") + return [] + + logger.debug(f"Scanning for no-auth services in: {registry_dir}") + + for json_file in registry_dir.glob("*.json"): + # Skip server_state.json + if json_file.name == "server_state.json": + continue + + try: + with open(json_file, 'r') as f: + server_config = json.load(f) + + auth_type = server_config.get("auth_type") + if auth_type == "none": + # Extract relevant service information + service = { + "server_name": server_config.get("server_name", "Unknown"), + "path": server_config.get("path", ""), + "proxy_pass_url": server_config.get("proxy_pass_url", ""), + "supported_transports": server_config.get("supported_transports", ["streamable-http"]), + "description": server_config.get("description", ""), + "file_name": json_file.name + } + noauth_services.append(service) + logger.debug(f"Found no-auth service: {service['server_name']} ({service['path']})") + except (json.JSONDecodeError, IOError) as e: + logger.warning(f"Failed to parse {json_file.name}: {e}") + continue + + return noauth_services + + +def _regenerate_mcp_configs() -> bool: + """ + Regenerate MCP configuration files (mcp.json and vscode_mcp.json) after token refresh. + + Returns: + True if regeneration successful, False otherwise + """ + logger.info("Regenerating MCP configuration files...") + + try: + # Check for required files + ingress_file = OAUTH_TOKENS_DIR / "ingress.json" + has_ingress = ingress_file.exists() + + # Find all egress token files + egress_files = [] + for file_path in OAUTH_TOKENS_DIR.glob("*-egress.json"): + if file_path.is_file(): + egress_files.append(file_path) + logger.debug(f"Found egress token file: {file_path.name}") + + # Scan for no-auth services + noauth_services = _scan_noauth_services() + logger.info(f"Found {len(noauth_services)} no-auth services to include") + + if not has_ingress and not egress_files and not noauth_services: + logger.warning("No token files or no-auth services found, skipping MCP configuration generation") + return True + + # Generate both configurations + vscode_success = _generate_vscode_config(has_ingress, ingress_file, egress_files, noauth_services) + roocode_success = _generate_roocode_config(has_ingress, ingress_file, egress_files, noauth_services) + + if vscode_success and roocode_success: + logger.info("MCP configuration files regenerated successfully") + return True + else: + logger.error("Failed to regenerate some MCP configuration files") + return False + + except Exception as e: + logger.error(f"Error regenerating MCP configs: {e}") + return False + + +def _get_ingress_headers(ingress_file: Path) -> Dict[str, str]: + """ + Extract ingress authentication headers from token file. + + Args: + ingress_file: Path to ingress token file + + Returns: + Dictionary of ingress headers + """ + headers = {} + if ingress_file.exists(): + try: + with open(ingress_file, 'r') as f: + ingress_data = json.load(f) + headers = { + "X-Authorization": f"Bearer {ingress_data.get('access_token', '')}", + "X-User-Pool-Id": ingress_data.get('user_pool_id', ''), + "X-Client-Id": ingress_data.get('client_id', ''), + "X-Region": ingress_data.get('region', 'us-east-1') + } + except (json.JSONDecodeError, IOError) as e: + logger.warning(f"Failed to read ingress file: {e}") + + return headers + + +def _create_egress_server_config( + egress_file: Path, + ingress_headers: Dict[str, str], + registry_url: str, + config_type: str = "vscode" +) -> Tuple[str, Dict]: + """ + Create server configuration from egress token file. + + Args: + egress_file: Path to egress token file + ingress_headers: Ingress authentication headers + registry_url: Base registry URL + config_type: Either "vscode" or "roocode" + + Returns: + Tuple of (server_key, server_config) + """ + try: + with open(egress_file, 'r') as f: + egress_data = json.load(f) + except (json.JSONDecodeError, IOError) as e: + logger.warning(f"Failed to read egress file {egress_file.name}: {e}") + return None, None + + provider = egress_data.get('provider', '') + token = egress_data.get('access_token', '') + cloud_id = egress_data.get('cloud_id', '') + + # Determine server key and URL + if provider == 'atlassian': + server_key = 'atlassian' + headers = {"Authorization": f"Bearer {token}"} + if cloud_id: + headers["X-Atlassian-Cloud-Id"] = cloud_id + if ingress_headers: + headers.update(ingress_headers) + url = f"{registry_url}/atlassian/mcp" + + elif provider == 'bedrock-agentcore': + # Extract server name from filename + filename = egress_file.name + if filename.startswith("bedrock-agentcore-") and filename.endswith("-egress.json"): + server_key = filename.replace("bedrock-agentcore-", "").replace("-egress.json", "") + else: + server_key = "sre-gateway" + + headers = {"Authorization": f"Bearer {token}"} + if ingress_headers: + headers.update(ingress_headers) + url = f"{registry_url}/{server_key}/mcp" + + else: + # Generic provider + server_key = provider + headers = {"Authorization": f"Bearer {token}"} + if ingress_headers: + headers.update(ingress_headers) + url = f"{registry_url}/{provider}/mcp" + + # Create config based on type + if config_type == "vscode": + server_config = { + "url": url, + "headers": headers + } + else: # roocode + server_config = { + "type": "streamable-http", + "url": url, + "headers": headers, + "disabled": False, + "alwaysAllow": [] + } + + return server_key, server_config + + +def _create_noauth_server_config( + service: Dict, + ingress_headers: Dict[str, str], + registry_url: str, + config_type: str = "vscode" +) -> Tuple[str, Dict]: + """ + Create server configuration for no-auth service. + + Args: + service: No-auth service information + ingress_headers: Ingress authentication headers + registry_url: Base registry URL + config_type: Either "vscode" or "roocode" + + Returns: + Tuple of (server_key, server_config) + """ + # Use path as server key (remove leading and trailing slashes) + server_key = service["path"].strip("/") + if not server_key: + return None, None + + # Construct service URL + path = service["path"].rstrip("/") + service_url = f"{registry_url}{path}/mcp" + + # Create config based on type + if config_type == "vscode": + server_config = { + "url": service_url + } + if ingress_headers: + server_config["headers"] = ingress_headers + else: # roocode + # Determine transport type + supported_transports = service.get("supported_transports", ["streamable-http"]) + transport_type = supported_transports[0] if supported_transports else "streamable-http" + + server_config = { + "type": transport_type, + "url": service_url, + "disabled": False, + "alwaysAllow": [] + } + if ingress_headers: + server_config["headers"] = ingress_headers + + return server_key, server_config + + +def _generate_vscode_config( + has_ingress: bool, + ingress_file: Path, + egress_files: List[Path], + noauth_services: List[Dict] = None +) -> bool: + """ + Generate VS Code MCP configuration file. + + Args: + has_ingress: Whether ingress token is available + ingress_file: Path to ingress token file + egress_files: List of egress token file paths + noauth_services: List of no-auth service configurations + + Returns: + True if generation successful, False otherwise + """ + config_file = OAUTH_TOKENS_DIR / "vscode_mcp.json" + + try: + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file: + temp_path = temp_file.name + + # Default registry URL + registry_url = os.getenv('REGISTRY_URL', 'https://mcpgateway.ddns.net') + + # Initialize configuration + config = {"mcp": {"servers": {}}} + + # Get ingress headers + ingress_headers = _get_ingress_headers(ingress_file) if has_ingress else {} + + # Process egress files + for egress_file in egress_files: + server_key, server_config = _create_egress_server_config( + egress_file, ingress_headers, registry_url, "vscode" + ) + if server_key and server_config: + config["mcp"]["servers"][server_key] = server_config + logger.debug(f"Added egress service {server_key} to VS Code config") + + # Process no-auth services + if noauth_services: + for service in noauth_services: + server_key, server_config = _create_noauth_server_config( + service, ingress_headers, registry_url, "vscode" + ) + + # Skip if already added or invalid + if not server_key or server_key in config["mcp"]["servers"]: + continue + + config["mcp"]["servers"][server_key] = server_config + logger.debug(f"Added no-auth service {server_key} to VS Code config") + + # Write JSON to temp file + json.dump(config, temp_file, indent=2) + + # Move temp file to final location and set permissions + os.rename(temp_path, config_file) + os.chmod(config_file, 0o600) + + logger.info(f"Generated VS Code MCP config: {config_file}") + logger.debug(f"VS Code config written to: {config_file.absolute()}") + return True + + except Exception as e: + logger.error(f"Error generating VS Code MCP config: {e}") + if 'temp_path' in locals(): + try: + os.unlink(temp_path) + except: + pass + return False + + +def _generate_roocode_config( + has_ingress: bool, + ingress_file: Path, + egress_files: List[Path], + noauth_services: List[Dict] = None +) -> bool: + """ + Generate Roocode MCP configuration file. + + Args: + has_ingress: Whether ingress token is available + ingress_file: Path to ingress token file + egress_files: List of egress token file paths + noauth_services: List of no-auth service configurations + + Returns: + True if generation successful, False otherwise + """ + config_file = OAUTH_TOKENS_DIR / "mcp.json" + + try: + with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file: + temp_path = temp_file.name + + # Default registry URL + registry_url = os.getenv('REGISTRY_URL', 'https://mcpgateway.ddns.net') + + # Initialize configuration + config = {"mcpServers": {}} + + # Get ingress headers + ingress_headers = _get_ingress_headers(ingress_file) if has_ingress else {} + + # Process egress files + for egress_file in egress_files: + server_key, server_config = _create_egress_server_config( + egress_file, ingress_headers, registry_url, "roocode" + ) + if server_key and server_config: + config["mcpServers"][server_key] = server_config + logger.debug(f"Added egress service {server_key} to Roocode config") + + # Process no-auth services + if noauth_services: + for service in noauth_services: + server_key, server_config = _create_noauth_server_config( + service, ingress_headers, registry_url, "roocode" + ) + + # Skip if already added or invalid + if not server_key or server_key in config["mcpServers"]: + continue + + config["mcpServers"][server_key] = server_config + logger.debug(f"Added no-auth service {server_key} to Roocode config") + + # Write JSON to temp file + json.dump(config, temp_file, indent=2) + + # Move temp file to final location and set permissions + os.rename(temp_path, config_file) + os.chmod(config_file, 0o600) + + logger.info(f"Generated Roocode MCP config: {config_file}") + logger.debug(f"Roocode config written to: {config_file.absolute()}") + return True + + except Exception as e: + logger.error(f"Error generating Roocode MCP config: {e}") + if 'temp_path' in locals(): + try: + os.unlink(temp_path) + except: + pass + return False + + +def _run_refresh_cycle( + buffer_seconds: int = DEFAULT_EXPIRY_BUFFER, + force_refresh: bool = False +) -> None: + """ + Run a single refresh cycle, checking and refreshing expiring tokens. + + Args: + buffer_seconds: Number of seconds before expiry to trigger refresh + force_refresh: If True, refresh all tokens regardless of expiration + """ + logger.info("Starting token refresh cycle...") + logger.debug(f"Token directory: {OAUTH_TOKENS_DIR.absolute()}") + + # Find expiring tokens + if force_refresh: + expiring_tokens = _get_all_tokens() + logger.info("Force refresh enabled - will refresh all tokens") + else: + expiring_tokens = _get_expiring_tokens(buffer_seconds) + + if not expiring_tokens: + logger.info("No tokens need refreshing") + return + + logger.info(f"Found {len(expiring_tokens)} token(s) needing refresh") + + # Refresh each expiring token + success_count = 0 + for filepath, token_data in expiring_tokens: + logger.info(f"Attempting to refresh: {filepath.name}") + logger.debug(f"Processing token file: {filepath.absolute()}") + + if _refresh_token(filepath, token_data): + success_count += 1 + logger.info(f"Token successfully updated at: {filepath.absolute()}") + else: + logger.error(f"Failed to refresh: {filepath.name}") + logger.error(f"Failed token location: {filepath.absolute()}") + + logger.info(f"Refresh cycle complete: {success_count}/{len(expiring_tokens)} tokens refreshed successfully") + + # Regenerate MCP configuration files if any tokens were refreshed + if success_count > 0: + logger.info("Regenerating MCP configuration files after token refresh...") + if _regenerate_mcp_configs(): + logger.info("MCP configuration files updated successfully") + else: + logger.error("Failed to update MCP configuration files") + + +def _get_pidfile_path() -> Path: + """ + Get the path to the PID file for the token refresher service. + + Returns: + Path to the PID file + """ + return PROJECT_ROOT / "token_refresher.pid" + + +def _write_pidfile() -> None: + """ + Write the current process PID to the PID file. + """ + pidfile = _get_pidfile_path() + with open(pidfile, 'w') as f: + f.write(str(os.getpid())) + logger.debug(f"PID file written: {pidfile}") + + +def _remove_pidfile() -> None: + """ + Remove the PID file if it exists. + """ + pidfile = _get_pidfile_path() + try: + if pidfile.exists(): + pidfile.unlink() + logger.debug(f"PID file removed: {pidfile}") + except Exception as e: + logger.warning(f"Failed to remove PID file: {e}") + + +def _kill_existing_instance() -> bool: + """ + Kill any existing token refresher instance if running. + + Returns: + True if an existing instance was killed, False if none was found + """ + pidfile = _get_pidfile_path() + + if not pidfile.exists(): + logger.debug("No PID file found, no existing instance to kill") + return False + + try: + with open(pidfile, 'r') as f: + old_pid = int(f.read().strip()) + + # Check if process exists and is a token refresher + if psutil.pid_exists(old_pid): + try: + process = psutil.Process(old_pid) + cmdline = ' '.join(process.cmdline()) + + # Check if it's actually our token refresher process + if 'token_refresher.py' in cmdline: + logger.info(f"Found existing token refresher instance (PID: {old_pid})") + logger.info(f"Killing existing instance: {cmdline}") + + # Try graceful shutdown first + process.terminate() + try: + process.wait(timeout=5) + logger.info(f"Gracefully terminated existing instance (PID: {old_pid})") + except psutil.TimeoutExpired: + # Force kill if graceful shutdown fails + logger.warning(f"Graceful shutdown failed, force killing PID: {old_pid}") + process.kill() + process.wait() + logger.info(f"Force killed existing instance (PID: {old_pid})") + + return True + else: + logger.debug(f"PID {old_pid} exists but is not a token refresher process") + + except (psutil.NoSuchProcess, psutil.AccessDenied) as e: + logger.debug(f"Could not access process {old_pid}: {e}") + else: + logger.debug(f"PID {old_pid} no longer exists") + + # Clean up stale PID file + _remove_pidfile() + return False + + except (ValueError, FileNotFoundError) as e: + logger.debug(f"Invalid or missing PID file: {e}") + _remove_pidfile() + return False + except Exception as e: + logger.error(f"Error checking for existing instance: {e}") + return False + + +def _setup_signal_handlers() -> None: + """ + Set up signal handlers for graceful shutdown. + """ + def signal_handler(signum, frame): + logger.info(f"Received signal {signum}, shutting down gracefully...") + _remove_pidfile() + sys.exit(0) + + signal.signal(signal.SIGTERM, signal_handler) + signal.signal(signal.SIGINT, signal_handler) + + +def main(): + """Main entry point for the token refresher service.""" + parser = argparse.ArgumentParser( + description="OAuth Token Refresher Service", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run with default settings (check every 5 minutes, refresh 1 hour before expiry) + uv run python credentials-provider/token_refresher.py + + # Check every 10 minutes + uv run python credentials-provider/token_refresher.py --interval 600 + + # Refresh tokens 2 hours before expiry + uv run python credentials-provider/token_refresher.py --buffer 7200 + + # Run once and exit (for testing) + uv run python credentials-provider/token_refresher.py --once + + # Force refresh all tokens once and exit + uv run python credentials-provider/token_refresher.py --once --force + + # Run in background with logging + nohup uv run python credentials-provider/token_refresher.py > token_refresher.log 2>&1 & +""" + ) + + parser.add_argument( + "--interval", + type=int, + default=DEFAULT_CHECK_INTERVAL, + help=f"Check interval in seconds (default: {DEFAULT_CHECK_INTERVAL})" + ) + + parser.add_argument( + "--buffer", + type=int, + default=DEFAULT_EXPIRY_BUFFER, + help=f"Refresh tokens this many seconds before expiry (default: {DEFAULT_EXPIRY_BUFFER})" + ) + + parser.add_argument( + "--once", + action="store_true", + help="Run once and exit (for testing)" + ) + + parser.add_argument( + "--force", + action="store_true", + help="Force refresh all tokens regardless of expiration status" + ) + + parser.add_argument( + "--debug", + action="store_true", + help="Enable debug logging" + ) + + parser.add_argument( + "--no-kill", + action="store_true", + help="Do not kill existing instance (will exit if one is running)" + ) + + args = parser.parse_args() + + # Set debug logging if requested + if args.debug: + logging.getLogger().setLevel(logging.DEBUG) + + # Handle existing instances + if not args.once: # Only check for existing instances in continuous mode + if args.no_kill: + pidfile = _get_pidfile_path() + if pidfile.exists(): + try: + with open(pidfile, 'r') as f: + existing_pid = int(f.read().strip()) + if psutil.pid_exists(existing_pid): + logger.error(f"Another token refresher instance is already running (PID: {existing_pid})") + logger.error("Use --no-kill flag to prevent automatic killing, or stop the existing instance first") + sys.exit(1) + except: + pass # Invalid PID file, continue + else: + # Kill existing instance if found + killed = _kill_existing_instance() + if killed: + logger.info("Existing instance terminated, starting new instance") + time.sleep(1) # Brief pause to ensure cleanup + + logger.info("=" * 60) + logger.info("OAuth Token Refresher Service Starting") + logger.info(f"Check interval: {args.interval} seconds") + logger.info(f"Expiry buffer: {args.buffer} seconds ({args.buffer / 3600:.1f} hours)") + logger.info(f"OAuth tokens directory: {OAUTH_TOKENS_DIR}") + logger.info("=" * 60) + + # Set up signal handlers and PID file for continuous mode + if not args.once: + _setup_signal_handlers() + _write_pidfile() + + try: + # Run once or continuously + if args.once: + logger.info("Running single refresh cycle...") + _run_refresh_cycle(args.buffer, args.force) + else: + logger.info("Starting continuous monitoring...") + while True: + try: + _run_refresh_cycle(args.buffer, args.force) + logger.info(f"Sleeping for {args.interval} seconds...") + time.sleep(args.interval) + except KeyboardInterrupt: + logger.info("Received interrupt signal, shutting down...") + break + except Exception as e: + logger.error(f"Unexpected error in refresh cycle: {e}") + logger.info(f"Continuing after error, sleeping for {args.interval} seconds...") + time.sleep(args.interval) + finally: + # Clean up PID file + if not args.once: + _remove_pidfile() + + logger.info("Token Refresher Service stopped") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index e102856..31f37e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,8 @@ dependencies = [ "httpcore[asyncio]>=1.0.9", "pyyaml>=6.0.0", "langchain-anthropic>=0.3.17", + "matplotlib>=3.10.5", + "psutil>=6.1.0", ] [project.optional-dependencies] diff --git a/registry/health/service.py b/registry/health/service.py index 9cc4140..d130be7 100644 --- a/registry/health/service.py +++ b/registry/health/service.py @@ -363,16 +363,17 @@ async def _check_single_service(self, client: httpx.AsyncClient, service_path: s try: # Try to reach the service endpoint using transport-aware checking - success = await self._check_server_endpoint_transport_aware(client, proxy_pass_url, server_info) + is_healthy, status_detail = await self._check_server_endpoint_transport_aware(client, proxy_pass_url, server_info) - if success: - new_status = "healthy" + if is_healthy: + new_status = status_detail # Could be "healthy" or "healthy-auth-expired" - # If service transitioned to healthy, fetch tool list (but don't block) - if previous_status != "healthy": + # If service transitioned to healthy (including auth-expired), fetch tool list (but don't block) + # Only do this for fully healthy status, not auth-expired + if previous_status != "healthy" and status_detail == "healthy": asyncio.create_task(self._update_tools_background(service_path, proxy_pass_url)) else: - new_status = "unhealthy: endpoint check failed" + new_status = status_detail # Detailed error message from transport check except httpx.TimeoutException: new_status = "unhealthy: timeout" @@ -416,10 +417,55 @@ def _build_headers_for_server(self, server_info: Dict) -> Dict[str, str]: return headers - async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient, proxy_pass_url: str, server_info: Dict) -> bool: - """Check server endpoint using transport-aware logic.""" - if not proxy_pass_url: + async def _try_ping_without_auth(self, client: httpx.AsyncClient, endpoint: str) -> bool: + """ + Try a simple ping without authentication headers. + Used as fallback when auth fails to determine if server is reachable. + + Args: + client: httpx AsyncClient instance + endpoint: The MCP endpoint URL to ping + + Returns: + bool: True if server responds (indicating it's reachable but auth expired) + """ + try: + # Minimal headers without auth + headers = { + 'Accept': 'application/json', + 'Content-Type': 'application/json' + } + ping_payload = '{ "jsonrpc": "2.0", "id": "0", "method": "ping" }' + + response = await client.post( + endpoint, + headers=headers, + content=ping_payload, + timeout=httpx.Timeout(5.0), + follow_redirects=True + ) + + # Check if we get any valid response (even auth errors indicate server is up) + if response.status_code in [200, 400, 401, 403]: + logger.info(f"Ping without auth succeeded for {endpoint} - server is reachable but auth may have expired") + return True + else: + logger.warning(f"Ping without auth failed for {endpoint}: Status {response.status_code}") + return False + + except Exception as e: + logger.warning(f"Ping without auth failed for {endpoint}: {type(e).__name__} - {e}") return False + + + async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient, proxy_pass_url: str, server_info: Dict) -> tuple[bool, str]: + """Check server endpoint using transport-aware logic. + + Returns: + tuple[bool, str]: (is_healthy, status_detail) + """ + if not proxy_pass_url: + return False, "unhealthy: missing proxy URL" # Get transport information from server_info supported_transports = server_info.get("supported_transports", ["streamable-http"]) @@ -444,19 +490,22 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.debug(f"SSE endpoint {proxy_pass_url} returned 200 OK before timeout - considering healthy") - return True + return True, "healthy" # For SSE, timeout after initial connection usually means server is responding - return True + return True, "healthy" except Exception as e: logger.warning(f"SSE endpoint {proxy_pass_url} failed with exception: {type(e).__name__} - {e}") - return False + return False, f"unhealthy: {type(e).__name__}" else: logger.info(f"[TRACE] Detected MCP endpoint in URL, using standard HTTP handling") response = await client.get(proxy_pass_url, headers=headers, follow_redirects=True) - return self._is_mcp_endpoint_healthy(response) + if self._is_mcp_endpoint_healthy(response): + return True, "healthy" + else: + return False, f"unhealthy: status {response.status_code}" except Exception as e: logger.warning(f"Health check failed for {proxy_pass_url}: {type(e).__name__} - {e}") - return False + return False, f"unhealthy: {type(e).__name__}" # Try endpoints based on supported transports, prioritizing streamable-http logger.info(f"[TRACE] No transport endpoint in URL: {proxy_pass_url}") @@ -477,13 +526,26 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient logger.info(f"[TRACE] Headers being sent: {headers}") response = await client.post(endpoint, headers=headers, content=ping_payload, follow_redirects=True) logger.info(f"[TRACE] Response status: {response.status_code}") + + # Check for auth failures first + if response.status_code in [401, 403]: + logger.info(f"[TRACE] Auth failure detected ({response.status_code}) for {endpoint}, trying ping without auth") + if await self._try_ping_without_auth(client, endpoint): + return True, "healthy-auth-expired" + else: + return False, f"unhealthy: auth failed and ping without auth failed" + + # Check normal health status if self._is_mcp_endpoint_healthy_streamable(response): logger.info(f"Health check succeeded at {endpoint}") - return True + return True, "healthy" else: logger.warning(f"Health check failed for {endpoint}: Status {response.status_code}, Response: {response.text}") + return False, f"unhealthy: status {response.status_code}" + except Exception as e: logger.warning(f"Health check failed for {endpoint}: {type(e).__name__} - {e}") + return False, f"unhealthy: {type(e).__name__}" # Fallback to SSE if "sse" in supported_transports: @@ -496,16 +558,16 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient timeout = httpx.Timeout(connect=5.0, read=2.0, write=5.0, pool=5.0) response = await client.get(sse_endpoint, headers=headers, follow_redirects=True, timeout=timeout) if self._is_mcp_endpoint_healthy(response): - return True + return True, "healthy" except (httpx.TimeoutException, asyncio.TimeoutError) as e: # For SSE endpoints, timeout while reading streaming response is normal after getting 200 OK logger.info(f"SSE endpoint {sse_endpoint} timed out while streaming (expected): {e}") # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.info(f"SSE endpoint {sse_endpoint} returned 200 OK before timeout - considering healthy") - return True + return True, "healthy" # For SSE, timeout after initial connection usually means server is responding - return True + return True, "healthy" except Exception as e: logger.error(f"SSE endpoint {sse_endpoint} failed with exception: {type(e).__name__} - {e}") pass @@ -526,9 +588,10 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient logger.info(f"[TRACE] Response status: {response.status_code}") if self._is_mcp_endpoint_healthy_streamable(response): logger.info(f"Health check succeeded at {endpoint}") - return True + return True, "healthy" else: logger.warning(f"Health check failed for {endpoint}: Status {response.status_code}, Response: {response.text}") + return False, f"unhealthy: status {response.status_code}" except Exception as e: logger.warning(f"Health check failed for {endpoint}: {type(e).__name__} - {e}") @@ -540,21 +603,21 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient timeout = httpx.Timeout(connect=5.0, read=2.0, write=5.0, pool=5.0) response = await client.get(sse_endpoint, headers=headers, follow_redirects=True, timeout=timeout) if self._is_mcp_endpoint_healthy(response): - return True + return True, "healthy" except (httpx.TimeoutException, asyncio.TimeoutError) as e: # For SSE endpoints, timeout while reading streaming response is normal after getting 200 OK logger.info(f"SSE endpoint {sse_endpoint} timed out while streaming (expected): {e}") # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.info(f"SSE endpoint {sse_endpoint} returned 200 OK before timeout - considering healthy") - return True + return True, "healthy" # For SSE, timeout after initial connection usually means server is responding - return True + return True, "healthy" except Exception as e: logger.error(f"SSE endpoint {sse_endpoint} failed with exception: {type(e).__name__} - {e}") pass - return False + return False, "unhealthy: all transport checks failed" def _is_mcp_endpoint_healthy_streamable(self, response) -> bool: @@ -714,18 +777,21 @@ async def perform_immediate_health_check(self, service_path: str) -> tuple[str, try: async with httpx.AsyncClient(timeout=httpx.Timeout(settings.health_check_timeout_seconds)) as client: # Use transport-aware endpoint checking - success = await self._check_server_endpoint_transport_aware(client, proxy_pass_url, server_info) + is_healthy, status_detail = await self._check_server_endpoint_transport_aware(client, proxy_pass_url, server_info) - if success: - current_status = "healthy" - logger.info(f"Health check successful for {service_path} ({proxy_pass_url}).") + if is_healthy: + current_status = status_detail # Could be "healthy" or "healthy-auth-expired" + logger.info(f"Health check successful for {service_path} ({proxy_pass_url}): {status_detail}") - # Schedule tool list fetch in background (don't block the response) - asyncio.create_task(self._update_tools_background(service_path, proxy_pass_url)) + # Schedule tool list fetch in background only for fully healthy status + if status_detail == "healthy": + asyncio.create_task(self._update_tools_background(service_path, proxy_pass_url)) + elif status_detail == "healthy-auth-expired": + logger.warning(f"Auth token expired for {service_path} but server is reachable") else: - current_status = "unhealthy: endpoint check failed" - logger.info(f"Health check failed for {service_path} ({proxy_pass_url}).") + current_status = status_detail # Detailed error from transport check + logger.info(f"Health check failed for {service_path} ({proxy_pass_url}): {status_detail}") except httpx.TimeoutException: current_status = "unhealthy: timeout" diff --git a/start_token_refresher.sh b/start_token_refresher.sh new file mode 100755 index 0000000..1c21366 --- /dev/null +++ b/start_token_refresher.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Token Refresher Launcher Script +# This script starts the OAuth token refresher service in the background + +set -e + +# Get the directory where this script is located +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +TOKEN_REFRESHER_SCRIPT="$SCRIPT_DIR/credentials-provider/token_refresher.py" + +# Configuration +CHECK_INTERVAL=${TOKEN_REFRESH_INTERVAL:-300} # 5 minutes default +EXPIRY_BUFFER=${TOKEN_EXPIRY_BUFFER:-3600} # 1 hour default + +# Log file location +LOG_FILE="$SCRIPT_DIR/token_refresher.log" + +echo "Starting OAuth Token Refresher Service..." +echo "Check interval: ${CHECK_INTERVAL} seconds" +echo "Expiry buffer: ${EXPIRY_BUFFER} seconds" +echo "Log file: ${LOG_FILE}" + +# Check if token refresher is already running +if pgrep -f "token_refresher.py" > /dev/null; then + echo "WARNING: Token refresher service appears to be already running" + echo "Existing processes:" + pgrep -fl "token_refresher.py" + + read -p "Kill existing processes and restart? (y/N): " -n 1 -r + echo + if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Killing existing token refresher processes..." + pkill -f "token_refresher.py" || true + sleep 2 + else + echo "ERROR: Aborted - token refresher service already running" + exit 1 + fi +fi + +# Start the token refresher service in background +echo "Starting token refresher service..." +nohup uv run python "$TOKEN_REFRESHER_SCRIPT" \ + --interval "$CHECK_INTERVAL" \ + --buffer "$EXPIRY_BUFFER" \ + > "$LOG_FILE" 2>&1 & + +TOKEN_REFRESHER_PID=$! +echo "Token refresher service started with PID: $TOKEN_REFRESHER_PID" + +# Wait a moment and check if it's still running +sleep 2 +if kill -0 "$TOKEN_REFRESHER_PID" 2>/dev/null; then + echo "Service is running successfully" + echo "Monitor logs with: tail -f $LOG_FILE" + echo "Stop service with: pkill -f token_refresher.py" +else + echo "ERROR: Service failed to start - check logs:" + tail "$LOG_FILE" + exit 1 +fi + +# Show first few lines of output +echo "" +echo "Recent log output:" +echo "====================" +tail -n 10 "$LOG_FILE" || echo "No log output yet" \ No newline at end of file diff --git a/uv.lock b/uv.lock index 0632a2d..d503e4d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = "==3.12.*" resolution-markers = [ "python_full_version >= '3.12.4'", @@ -197,6 +197,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" }, ] +[[package]] +name = "contourpy" +version = "1.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/01/1253e6698a07380cd31a736d248a3f2a50a7c88779a1813da27503cadc2a/contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880", size = 13466174, upload-time = "2025-07-26T12:03:12.549Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/45/adfee365d9ea3d853550b2e735f9d66366701c65db7855cd07621732ccfc/contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb", size = 293419, upload-time = "2025-07-26T12:01:21.16Z" }, + { url = "https://files.pythonhosted.org/packages/53/3e/405b59cfa13021a56bba395a6b3aca8cec012b45bf177b0eaf7a202cde2c/contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6", size = 273979, upload-time = "2025-07-26T12:01:22.448Z" }, + { url = "https://files.pythonhosted.org/packages/d4/1c/a12359b9b2ca3a845e8f7f9ac08bdf776114eb931392fcad91743e2ea17b/contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7", size = 332653, upload-time = "2025-07-26T12:01:24.155Z" }, + { url = "https://files.pythonhosted.org/packages/63/12/897aeebfb475b7748ea67b61e045accdfcf0d971f8a588b67108ed7f5512/contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8", size = 379536, upload-time = "2025-07-26T12:01:25.91Z" }, + { url = "https://files.pythonhosted.org/packages/43/8a/a8c584b82deb248930ce069e71576fc09bd7174bbd35183b7943fb1064fd/contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea", size = 384397, upload-time = "2025-07-26T12:01:27.152Z" }, + { url = "https://files.pythonhosted.org/packages/cc/8f/ec6289987824b29529d0dfda0d74a07cec60e54b9c92f3c9da4c0ac732de/contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1", size = 362601, upload-time = "2025-07-26T12:01:28.808Z" }, + { url = "https://files.pythonhosted.org/packages/05/0a/a3fe3be3ee2dceb3e615ebb4df97ae6f3828aa915d3e10549ce016302bd1/contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7", size = 1331288, upload-time = "2025-07-26T12:01:31.198Z" }, + { url = "https://files.pythonhosted.org/packages/33/1d/acad9bd4e97f13f3e2b18a3977fe1b4a37ecf3d38d815333980c6c72e963/contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411", size = 1403386, upload-time = "2025-07-26T12:01:33.947Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8f/5847f44a7fddf859704217a99a23a4f6417b10e5ab1256a179264561540e/contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69", size = 185018, upload-time = "2025-07-26T12:01:35.64Z" }, + { url = "https://files.pythonhosted.org/packages/19/e8/6026ed58a64563186a9ee3f29f41261fd1828f527dd93d33b60feca63352/contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b", size = 226567, upload-time = "2025-07-26T12:01:36.804Z" }, + { url = "https://files.pythonhosted.org/packages/d1/e2/f05240d2c39a1ed228d8328a78b6f44cd695f7ef47beb3e684cf93604f86/contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc", size = 193655, upload-time = "2025-07-26T12:01:37.999Z" }, +] + [[package]] name = "coverage" version = "7.8.2" @@ -223,6 +245,15 @@ version = "0.9.5" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f1/2a/8c3ac3d8bc94e6de8d7ae270bb5bc437b210bb9d6d9e46630c98f4abd20c/csscompressor-0.9.5.tar.gz", hash = "sha256:afa22badbcf3120a4f392e4d22f9fff485c044a1feda4a950ecc5eba9dd31a05", size = 237808, upload-time = "2017-11-26T21:13:08.238Z" } +[[package]] +name = "cycler" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a9/95/a3dbbb5028f35eafb79008e7522a75244477d2838f38cbb722248dabc2a8/cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c", size = 7615, upload-time = "2023-10-07T05:32:18.335Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/05/c19819d5e3d95294a6f5947fb9b9629efb316b96de511b418c53d245aae6/cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30", size = 8321, upload-time = "2023-10-07T05:32:16.783Z" }, +] + [[package]] name = "deprecated" version = "1.2.18" @@ -291,7 +322,6 @@ dependencies = [ { name = "numpy" }, { name = "packaging" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/e7/9a/e33fc563f007924dd4ec3c5101fe5320298d6c13c158a24a9ed849058569/faiss_cpu-1.11.0.tar.gz", hash = "sha256:44877b896a2b30a61e35ea4970d008e8822545cb340eca4eff223ac7f40a1db9", size = 70218, upload-time = "2025-04-28T07:48:30.459Z" } wheels = [ { url = "https://files.pythonhosted.org/packages/3b/d3/7178fa07047fd770964a83543329bb5e3fc1447004cfd85186ccf65ec3ee/faiss_cpu-1.11.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:356437b9a46f98c25831cdae70ca484bd6c05065af6256d87f6505005e9135b9", size = 3313807, upload-time = "2025-04-28T07:47:54.533Z" }, { url = "https://files.pythonhosted.org/packages/9e/71/25f5f7b70a9f22a3efe19e7288278da460b043a3b60ad98e4e47401ed5aa/faiss_cpu-1.11.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c4a3d35993e614847f3221c6931529c0bac637a00eff0d55293e1db5cb98c85f", size = 7913537, upload-time = "2025-04-28T07:47:56.723Z" }, @@ -335,6 +365,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, ] +[[package]] +name = "fonttools" +version = "4.59.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/7f/29c9c3fe4246f6ad96fee52b88d0dc3a863c7563b0afc959e36d78b965dc/fonttools-4.59.1.tar.gz", hash = "sha256:74995b402ad09822a4c8002438e54940d9f1ecda898d2bb057729d7da983e4cb", size = 3534394, upload-time = "2025-08-14T16:28:14.266Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/fe/6e069cc4cb8881d164a9bd956e9df555bc62d3eb36f6282e43440200009c/fonttools-4.59.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:43ab814bbba5f02a93a152ee61a04182bb5809bd2bc3609f7822e12c53ae2c91", size = 2769172, upload-time = "2025-08-14T16:26:45.729Z" }, + { url = "https://files.pythonhosted.org/packages/b9/98/ec4e03f748fefa0dd72d9d95235aff6fef16601267f4a2340f0e16b9330f/fonttools-4.59.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4f04c3ffbfa0baafcbc550657cf83657034eb63304d27b05cff1653b448ccff6", size = 2337281, upload-time = "2025-08-14T16:26:47.921Z" }, + { url = "https://files.pythonhosted.org/packages/8b/b1/890360a7e3d04a30ba50b267aca2783f4c1364363797e892e78a4f036076/fonttools-4.59.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:d601b153e51a5a6221f0d4ec077b6bfc6ac35bfe6c19aeaa233d8990b2b71726", size = 4909215, upload-time = "2025-08-14T16:26:49.682Z" }, + { url = "https://files.pythonhosted.org/packages/8a/ec/2490599550d6c9c97a44c1e36ef4de52d6acf742359eaa385735e30c05c4/fonttools-4.59.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c735e385e30278c54f43a0d056736942023c9043f84ee1021eff9fd616d17693", size = 4951958, upload-time = "2025-08-14T16:26:51.616Z" }, + { url = "https://files.pythonhosted.org/packages/d1/40/bd053f6f7634234a9b9805ff8ae4f32df4f2168bee23cafd1271ba9915a9/fonttools-4.59.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1017413cdc8555dce7ee23720da490282ab7ec1cf022af90a241f33f9a49afc4", size = 4894738, upload-time = "2025-08-14T16:26:53.836Z" }, + { url = "https://files.pythonhosted.org/packages/ac/a1/3cd12a010d288325a7cfcf298a84825f0f9c29b01dee1baba64edfe89257/fonttools-4.59.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5c6d8d773470a5107052874341ed3c487c16ecd179976d81afed89dea5cd7406", size = 5045983, upload-time = "2025-08-14T16:26:56.153Z" }, + { url = "https://files.pythonhosted.org/packages/a2/af/8a2c3f6619cc43cf87951405337cc8460d08a4e717bb05eaa94b335d11dc/fonttools-4.59.1-cp312-cp312-win32.whl", hash = "sha256:2a2d0d33307f6ad3a2086a95dd607c202ea8852fa9fb52af9b48811154d1428a", size = 2203407, upload-time = "2025-08-14T16:26:58.165Z" }, + { url = "https://files.pythonhosted.org/packages/8e/f2/a19b874ddbd3ebcf11d7e25188ef9ac3f68b9219c62263acb34aca8cde05/fonttools-4.59.1-cp312-cp312-win_amd64.whl", hash = "sha256:0b9e4fa7eaf046ed6ac470f6033d52c052481ff7a6e0a92373d14f556f298dc0", size = 2251561, upload-time = "2025-08-14T16:27:00.646Z" }, + { url = "https://files.pythonhosted.org/packages/0f/64/9d606e66d498917cd7a2ff24f558010d42d6fd4576d9dd57f0bd98333f5a/fonttools-4.59.1-py3-none-any.whl", hash = "sha256:647db657073672a8330608970a984d51573557f328030566521bc03415535042", size = 1130094, upload-time = "2025-08-14T16:28:12.048Z" }, +] + [[package]] name = "freezegun" version = "1.5.2" @@ -646,6 +693,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" }, ] +[[package]] +name = "kiwisolver" +version = "1.4.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/3c/85844f1b0feb11ee581ac23fe5fce65cd049a200c1446708cc1b7f922875/kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d", size = 97564, upload-time = "2025-08-10T21:27:49.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/86/c9/13573a747838aeb1c76e3267620daa054f4152444d1f3d1a2324b78255b5/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999", size = 123686, upload-time = "2025-08-10T21:26:10.034Z" }, + { url = "https://files.pythonhosted.org/packages/51/ea/2ecf727927f103ffd1739271ca19c424d0e65ea473fbaeea1c014aea93f6/kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2", size = 66460, upload-time = "2025-08-10T21:26:11.083Z" }, + { url = "https://files.pythonhosted.org/packages/5b/5a/51f5464373ce2aeb5194508298a508b6f21d3867f499556263c64c621914/kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14", size = 64952, upload-time = "2025-08-10T21:26:12.058Z" }, + { url = "https://files.pythonhosted.org/packages/70/90/6d240beb0f24b74371762873e9b7f499f1e02166a2d9c5801f4dbf8fa12e/kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04", size = 1474756, upload-time = "2025-08-10T21:26:13.096Z" }, + { url = "https://files.pythonhosted.org/packages/12/42/f36816eaf465220f683fb711efdd1bbf7a7005a2473d0e4ed421389bd26c/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752", size = 1276404, upload-time = "2025-08-10T21:26:14.457Z" }, + { url = "https://files.pythonhosted.org/packages/2e/64/bc2de94800adc830c476dce44e9b40fd0809cddeef1fde9fcf0f73da301f/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77", size = 1294410, upload-time = "2025-08-10T21:26:15.73Z" }, + { url = "https://files.pythonhosted.org/packages/5f/42/2dc82330a70aa8e55b6d395b11018045e58d0bb00834502bf11509f79091/kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198", size = 1343631, upload-time = "2025-08-10T21:26:17.045Z" }, + { url = "https://files.pythonhosted.org/packages/22/fd/f4c67a6ed1aab149ec5a8a401c323cee7a1cbe364381bb6c9c0d564e0e20/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d", size = 2224963, upload-time = "2025-08-10T21:26:18.737Z" }, + { url = "https://files.pythonhosted.org/packages/45/aa/76720bd4cb3713314677d9ec94dcc21ced3f1baf4830adde5bb9b2430a5f/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab", size = 2321295, upload-time = "2025-08-10T21:26:20.11Z" }, + { url = "https://files.pythonhosted.org/packages/80/19/d3ec0d9ab711242f56ae0dc2fc5d70e298bb4a1f9dfab44c027668c673a1/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2", size = 2487987, upload-time = "2025-08-10T21:26:21.49Z" }, + { url = "https://files.pythonhosted.org/packages/39/e9/61e4813b2c97e86b6fdbd4dd824bf72d28bcd8d4849b8084a357bc0dd64d/kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145", size = 2291817, upload-time = "2025-08-10T21:26:22.812Z" }, + { url = "https://files.pythonhosted.org/packages/a0/41/85d82b0291db7504da3c2defe35c9a8a5c9803a730f297bd823d11d5fb77/kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54", size = 73895, upload-time = "2025-08-10T21:26:24.37Z" }, + { url = "https://files.pythonhosted.org/packages/e2/92/5f3068cf15ee5cb624a0c7596e67e2a0bb2adee33f71c379054a491d07da/kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60", size = 64992, upload-time = "2025-08-10T21:26:25.732Z" }, +] + [[package]] name = "langchain-anthropic" version = "0.3.17" @@ -819,6 +887,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601, upload-time = "2024-10-18T15:21:23.499Z" }, ] +[[package]] +name = "matplotlib" +version = "3.10.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "contourpy" }, + { name = "cycler" }, + { name = "fonttools" }, + { name = "kiwisolver" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "pyparsing" }, + { name = "python-dateutil" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/43/91/f2939bb60b7ebf12478b030e0d7f340247390f402b3b189616aad790c366/matplotlib-3.10.5.tar.gz", hash = "sha256:352ed6ccfb7998a00881692f38b4ca083c691d3e275b4145423704c34c909076", size = 34804044, upload-time = "2025-07-31T18:09:33.805Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/66/1e/c6f6bcd882d589410b475ca1fc22e34e34c82adff519caf18f3e6dd9d682/matplotlib-3.10.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:00b6feadc28a08bd3c65b2894f56cf3c94fc8f7adcbc6ab4516ae1e8ed8f62e2", size = 8253056, upload-time = "2025-07-31T18:08:05.385Z" }, + { url = "https://files.pythonhosted.org/packages/53/e6/d6f7d1b59413f233793dda14419776f5f443bcccb2dfc84b09f09fe05dbe/matplotlib-3.10.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ee98a5c5344dc7f48dc261b6ba5d9900c008fc12beb3fa6ebda81273602cc389", size = 8110131, upload-time = "2025-07-31T18:08:07.293Z" }, + { url = "https://files.pythonhosted.org/packages/66/2b/bed8a45e74957549197a2ac2e1259671cd80b55ed9e1fe2b5c94d88a9202/matplotlib-3.10.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a17e57e33de901d221a07af32c08870ed4528db0b6059dce7d7e65c1122d4bea", size = 8669603, upload-time = "2025-07-31T18:08:09.064Z" }, + { url = "https://files.pythonhosted.org/packages/7e/a7/315e9435b10d057f5e52dfc603cd353167ae28bb1a4e033d41540c0067a4/matplotlib-3.10.5-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97b9d6443419085950ee4a5b1ee08c363e5c43d7176e55513479e53669e88468", size = 9508127, upload-time = "2025-07-31T18:08:10.845Z" }, + { url = "https://files.pythonhosted.org/packages/7f/d9/edcbb1f02ca99165365d2768d517898c22c6040187e2ae2ce7294437c413/matplotlib-3.10.5-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ceefe5d40807d29a66ae916c6a3915d60ef9f028ce1927b84e727be91d884369", size = 9566926, upload-time = "2025-07-31T18:08:13.186Z" }, + { url = "https://files.pythonhosted.org/packages/3b/d9/6dd924ad5616c97b7308e6320cf392c466237a82a2040381163b7500510a/matplotlib-3.10.5-cp312-cp312-win_amd64.whl", hash = "sha256:c04cba0f93d40e45b3c187c6c52c17f24535b27d545f757a2fffebc06c12b98b", size = 8107599, upload-time = "2025-07-31T18:08:15.116Z" }, + { url = "https://files.pythonhosted.org/packages/0e/f3/522dc319a50f7b0279fbe74f86f7a3506ce414bc23172098e8d2bdf21894/matplotlib-3.10.5-cp312-cp312-win_arm64.whl", hash = "sha256:a41bcb6e2c8e79dc99c5511ae6f7787d2fb52efd3d805fff06d5d4f667db16b2", size = 7978173, upload-time = "2025-07-31T18:08:21.518Z" }, +] + [[package]] name = "mcp" version = "1.9.3" @@ -856,7 +950,9 @@ dependencies = [ { name = "langchain-aws" }, { name = "langchain-mcp-adapters" }, { name = "langgraph" }, + { name = "matplotlib" }, { name = "mcp" }, + { name = "psutil" }, { name = "pydantic" }, { name = "pydantic-settings" }, { name = "pyjwt" }, @@ -916,11 +1012,13 @@ requires-dist = [ { name = "langchain-aws", specifier = ">=0.2.23" }, { name = "langchain-mcp-adapters", specifier = ">=0.0.11" }, { name = "langgraph", specifier = ">=0.4.3" }, + { name = "matplotlib", specifier = ">=3.10.5" }, { name = "mcp", specifier = ">=1.9.3" }, { name = "mkdocs", marker = "extra == 'docs'", specifier = ">=1.5.0" }, { name = "mkdocs-git-revision-date-localized-plugin", marker = "extra == 'docs'", specifier = ">=1.2.0" }, { name = "mkdocs-material", marker = "extra == 'docs'", specifier = ">=9.4.0" }, { name = "mkdocs-minify-plugin", marker = "extra == 'docs'", specifier = ">=0.7.0" }, + { name = "psutil", specifier = ">=6.1.0" }, { name = "pydantic", specifier = ">=2.11.3" }, { name = "pydantic-settings", specifier = ">=2.0.0" }, { name = "pyjwt", specifier = ">=2.10.1" }, @@ -1466,6 +1564,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/cc/7e77861000a0691aeea8f4566e5d3aa716f2b1dece4a24439437e41d3d25/protobuf-5.29.5-py3-none-any.whl", hash = "sha256:6cf42630262c59b2d8de33954443d94b746c952b01434fc58a417fdbd2e84bd5", size = 172823, upload-time = "2025-05-28T23:51:58.157Z" }, ] +[[package]] +name = "psutil" +version = "7.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2a/80/336820c1ad9286a4ded7e845b2eccfcb27851ab8ac6abece774a6ff4d3de/psutil-7.0.0.tar.gz", hash = "sha256:7be9c3eba38beccb6495ea33afd982a44074b78f28c434a1f51cc07fd315c456", size = 497003, upload-time = "2025-02-13T21:54:07.946Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/e6/2d26234410f8b8abdbf891c9da62bee396583f713fb9f3325a4760875d22/psutil-7.0.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:101d71dc322e3cffd7cea0650b09b3d08b8e7c4109dd6809fe452dfd00e58b25", size = 238051, upload-time = "2025-02-13T21:54:12.36Z" }, + { url = "https://files.pythonhosted.org/packages/04/8b/30f930733afe425e3cbfc0e1468a30a18942350c1a8816acfade80c005c4/psutil-7.0.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:39db632f6bb862eeccf56660871433e111b6ea58f2caea825571951d4b6aa3da", size = 239535, upload-time = "2025-02-13T21:54:16.07Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ed/d362e84620dd22876b55389248e522338ed1bf134a5edd3b8231d7207f6d/psutil-7.0.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1fcee592b4c6f146991ca55919ea3d1f8926497a713ed7faaf8225e174581e91", size = 275004, upload-time = "2025-02-13T21:54:18.662Z" }, + { url = "https://files.pythonhosted.org/packages/bf/b9/b0eb3f3cbcb734d930fdf839431606844a825b23eaf9a6ab371edac8162c/psutil-7.0.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b1388a4f6875d7e2aff5c4ca1cc16c545ed41dd8bb596cefea80111db353a34", size = 277986, upload-time = "2025-02-13T21:54:21.811Z" }, + { url = "https://files.pythonhosted.org/packages/eb/a2/709e0fe2f093556c17fbafda93ac032257242cabcc7ff3369e2cb76a97aa/psutil-7.0.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a5f098451abc2828f7dc6b58d44b532b22f2088f4999a937557b603ce72b1993", size = 279544, upload-time = "2025-02-13T21:54:24.68Z" }, + { url = "https://files.pythonhosted.org/packages/50/e6/eecf58810b9d12e6427369784efe814a1eec0f492084ce8eb8f4d89d6d61/psutil-7.0.0-cp37-abi3-win32.whl", hash = "sha256:ba3fcef7523064a6c9da440fc4d6bd07da93ac726b5733c29027d7dc95b39d99", size = 241053, upload-time = "2025-02-13T21:54:34.31Z" }, + { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" }, +] + [[package]] name = "pycparser" version = "2.22" @@ -1560,6 +1673,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e4/06/43084e6cbd4b3bc0e80f6be743b2e79fbc6eed8de9ad8c629939fa55d972/pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d", size = 266178, upload-time = "2025-07-28T16:19:31.401Z" }, ] +[[package]] +name = "pyparsing" +version = "3.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bb/22/f1129e69d94ffff626bdb5c835506b3a5b4f3d070f17ea295e12c2c6f60f/pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be", size = 1088608, upload-time = "2025-03-25T05:01:28.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" }, +] + [[package]] name = "pytest" version = "8.4.0" From 7ee2a93f5a06166dccbc966a41c2a896c1f40127 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Sat, 6 Sep 2025 15:19:44 +0000 Subject: [PATCH 2/5] Implement healthy-auth-expired status support and constants refactoring - Add HealthStatus enum with Pydantic model for type-safe status management - Fix nginx service to include services with expired auth tokens - Update React frontend to display 'Healthy (Auth Expired)' status with orange styling - Replace hardcoded status strings throughout codebase with constants - Add comprehensive health check ping fallback for expired authentication - Improve service availability by including reachable services regardless of auth status --- .gitignore | 1 + README.md | 4 +- docs/auth.md | 21 ++ docs/jwt-token-vending.md | 41 ++- docs/token-refresh-service.md | 446 +++++++++++++++++++++++++ frontend/src/components/ServerCard.tsx | 10 +- frontend/src/hooks/useServerStats.ts | 2 +- frontend/src/pages/Dashboard.tsx | 2 +- registry/constants.py | 60 ++++ registry/core/nginx_service.py | 8 +- registry/health/service.py | 41 +-- 11 files changed, 608 insertions(+), 28 deletions(-) create mode 100644 docs/token-refresh-service.md create mode 100644 registry/constants.py diff --git a/.gitignore b/.gitignore index f12b7d0..a4df334 100644 --- a/.gitignore +++ b/.gitignore @@ -198,6 +198,7 @@ registry/server_state.json registry/nginx_mcp_revproxy.conf logs/ token_refresher.pid +token_refresher.log # Secrets and API keys - never commit these! .keys.yml diff --git a/README.md b/README.md index 05075bf..d8af84e 100644 --- a/README.md +++ b/README.md @@ -335,6 +335,7 @@ Transform how both autonomous AI agents and development teams access enterprise - **Amazon Bedrock AgentCore Integration** - Direct access to AWS services through managed MCP endpoints - **Three-Legged OAuth (3LO) Support** - External service integration (Atlassian, Google, GitHub) - **JWT Token Vending Service** - Self-service token generation for automation +- **Automated Token Refresh Service** - Background token refresh to maintain continuous authentication - **Modern React Frontend** - Complete UI overhaul with TypeScript and real-time updates - **Dynamic Tool Discovery** - AI agents autonomously find and execute specialized tools - **Fine-Grained Access Control** - Granular permissions for servers, methods, and individual tools @@ -347,7 +348,8 @@ Transform how both autonomous AI agents and development teams access enterprise |------------------|-------------------|------------------------| | [Installation Guide](docs/installation.md)
Complete setup instructions for EC2 and EKS | [Authentication Guide](docs/auth.md)
OAuth and identity provider integration | [AI Coding Assistants Setup](docs/ai-coding-assistants-setup.md)
VS Code, Cursor, Claude Code integration | | [Quick Start Tutorial](docs/quick-start.md)
Get running in 5 minutes | [Amazon Cognito Setup](docs/cognito.md)
Step-by-step IdP configuration | [API Reference](docs/registry_api.md)
Programmatic registry management | -| [Configuration Reference](docs/configuration.md)
Environment variables and settings | [Fine-Grained Access Control](docs/scopes.md)
Permission management and security | [Dynamic Tool Discovery](docs/dynamic-tool-discovery.md)
Autonomous agent capabilities | +| [Configuration Reference](docs/configuration.md)
Environment variables and settings | [Fine-Grained Access Control](docs/scopes.md)
Permission management and security | [Token Refresh Service](docs/token-refresh-service.md)
Automated token refresh and lifecycle management | +| | | [Dynamic Tool Discovery](docs/dynamic-tool-discovery.md)
Autonomous agent capabilities | | | | [Production Deployment](docs/installation.md)
Complete setup for production environments | | | | [Troubleshooting Guide](docs/FAQ.md)
Common issues and solutions | diff --git a/docs/auth.md b/docs/auth.md index ee3b0ca..0f2ac4d 100644 --- a/docs/auth.md +++ b/docs/auth.md @@ -618,6 +618,27 @@ with open(config_path) as f: - **Ingress tokens**: 1-hour expiry, auto-refresh via client credentials - **Egress tokens**: Provider-specific, refresh tokens where available - **Session management**: Handled automatically by OAuth scripts +- **Automated refresh service**: Background service monitors and refreshes all tokens + +#### Token Refresh Service + +The MCP Gateway includes an [Automated Token Refresh Service](token-refresh-service.md) that provides: + +- **Continuous monitoring** of all OAuth tokens for expiration +- **Proactive refresh** before tokens expire (configurable 1-hour buffer) +- **Automatic MCP config generation** for coding assistants +- **Service discovery** for both OAuth and no-auth services +- **Background operation** with comprehensive logging + +Start the token refresh service: +```bash +./start_token_refresher.sh +``` + +The service automatically generates MCP configurations for: +- **VS Code extensions** (`.oauth-tokens/vscode_mcp.json`) +- **Claude Code/Roocode** (`.oauth-tokens/mcp.json`) +- **Custom MCP clients** (standard configuration format) --- diff --git a/docs/jwt-token-vending.md b/docs/jwt-token-vending.md index bd604cf..a22a195 100644 --- a/docs/jwt-token-vending.md +++ b/docs/jwt-token-vending.md @@ -405,4 +405,43 @@ The token generation interface provides: - **Usage Instructions**: Clear examples of how to use the generated token - **Security Warnings**: Prominent warnings about token storage and sharing -By implementing the JWT Token Vending Service, organizations can provide their users with a secure, user-friendly way to generate programmatic access tokens while maintaining enterprise-grade security controls and comprehensive audit capabilities. The service seamlessly integrates with existing MCP Gateway infrastructure and provides a foundation for advanced token management features. \ No newline at end of file +By implementing the JWT Token Vending Service, organizations can provide their users with a secure, user-friendly way to generate programmatic access tokens while maintaining enterprise-grade security controls and comprehensive audit capabilities. The service seamlessly integrates with existing MCP Gateway infrastructure and provides a foundation for advanced token management features. + +## Integration with Token Refresh Service + +The JWT Token Vending Service works seamlessly with the [Automated Token Refresh Service](token-refresh-service.md) to provide comprehensive token lifecycle management: + +### Automatic Token Monitoring + +Once tokens are generated through the vending service, the token refresh service automatically: + +- **Monitors expiration times** for all generated tokens +- **Proactively refreshes** tokens before they expire (configurable buffer time) +- **Updates MCP client configurations** with fresh tokens +- **Maintains continuous authentication** without user intervention + +### MCP Client Configuration + +The token refresh service automatically generates MCP client configurations that include tokens from the vending service: + +- **VS Code Extensions** - Automatically configured with refreshed tokens +- **Claude Code/Roocode** - Real-time token updates for coding assistants +- **Custom MCP Clients** - Standard configuration format for any MCP client + +### Enhanced Security Model + +The combination of both services provides: + +- **Short-lived primary tokens** from the vending service (1-24 hours) +- **Automatic refresh capability** using secure refresh tokens +- **Zero-downtime token rotation** for continuous service availability +- **Centralized token lifecycle management** with comprehensive audit trails + +### Usage Pattern + +1. **Generate Initial Token** - Use the JWT Token Vending Service web interface +2. **Automatic Refresh** - Token refresh service monitors and refreshes tokens +3. **Client Integration** - MCP clients automatically use refreshed tokens +4. **Continuous Operation** - No manual intervention required for token management + +For detailed setup and configuration of the token refresh service, see the [Token Refresh Service Documentation](token-refresh-service.md). \ No newline at end of file diff --git a/docs/token-refresh-service.md b/docs/token-refresh-service.md new file mode 100644 index 0000000..f17f592 --- /dev/null +++ b/docs/token-refresh-service.md @@ -0,0 +1,446 @@ +# Token Refresh Service + +The MCP Gateway Registry includes an automated token refresh service that maintains continuous authentication by monitoring token expiration and proactively refreshing them. This service ensures uninterrupted access to external services and generates MCP client configurations for coding assistants. + +## Overview + +The token refresh service provides: + +- **Automated Token Monitoring** - Continuously monitors OAuth tokens for expiration +- **Proactive Token Refresh** - Refreshes tokens before they expire using configurable buffer times +- **MCP Configuration Generation** - Creates client configs for VS Code, Cursor, and other coding assistants +- **Service Discovery** - Automatically includes both OAuth-authenticated and no-auth services +- **Background Operation** - Runs as a daemon service with comprehensive logging + +## Architecture + +```mermaid +graph TB + A[Token Refresher Service] --> B[OAuth Token Monitor] + A --> C[No-Auth Service Scanner] + A --> D[MCP Config Generator] + + B --> E[.oauth-tokens/*.json] + C --> F[registry/servers/*.json] + D --> G[.oauth-tokens/mcp.json] + D --> H[.oauth-tokens/vscode_mcp.json] + + E --> I[External OAuth Services] + F --> J[Local MCP Servers] + G --> K[Roocode/Claude Code] + H --> L[VS Code Extensions] +``` + +The service integrates with: +- **External OAuth services** (Atlassian, SRE Gateway, etc.) +- **Local MCP servers** (Current Time, Real Server Fake Tools, etc.) +- **MCP clients** (VS Code extensions, Claude Code, etc.) + +## Setup and Configuration + +### Prerequisites + +- Python 3.11+ with `uv` package manager +- Valid OAuth tokens in `.oauth-tokens/` directory +- MCP server configurations in `registry/servers/` + +### Environment Variables + +| Variable | Description | Default | +|----------|-------------|---------| +| `TOKEN_REFRESH_INTERVAL` | Check interval in seconds | 300 (5 minutes) | +| `TOKEN_EXPIRY_BUFFER` | Refresh buffer time in seconds | 3600 (1 hour) | + +### Starting the Service + +#### Option 1: Using the Launch Script (Recommended) + +```bash +# Start with interactive prompts +./start_token_refresher.sh + +# Start with custom configuration +export TOKEN_REFRESH_INTERVAL=180 # 3 minutes +export TOKEN_EXPIRY_BUFFER=1800 # 30 minutes +./start_token_refresher.sh +``` + +#### Option 2: Direct Python Execution + +```bash +# Start with default settings +uv run python credentials-provider/token_refresher.py + +# Start with custom settings +uv run python credentials-provider/token_refresher.py \ + --interval 300 \ + --buffer 3600 +``` + +### Command Line Options + +``` +usage: token_refresher.py [-h] [--interval INTERVAL] [--buffer BUFFER] + [--log-level {DEBUG,INFO,WARNING,ERROR}] + +MCP Gateway OAuth Token Refresher Service + +options: + -h, --help show this help message and exit + --interval INTERVAL Token check interval in seconds (default: 300) + --buffer BUFFER Token expiry buffer in seconds (default: 3600) + --log-level {DEBUG,INFO,WARNING,ERROR} + Set the logging level (default: INFO) +``` + +## Service Management + +### Monitoring Service Status + +```bash +# Check if service is running +pgrep -f "token_refresher.py" + +# View recent logs +tail -f token_refresher.log + +# Monitor real-time activity +tail -f token_refresher.log | grep -E "(REFRESH|CONFIG|ERROR)" +``` + +### Stopping the Service + +```bash +# Graceful shutdown +pkill -f "token_refresher.py" + +# Force kill if needed +pkill -9 -f "token_refresher.py" +``` + +### Service Health Checks + +The service creates a PID file (`token_refresher.pid`) for process management and logs all activities to `token_refresher.log`. + +## Generated Configurations + +### MCP Client Configurations + +The service automatically generates two MCP configuration files: + +#### Roocode/Claude Code Configuration +**File**: `.oauth-tokens/mcp.json` +```json +{ + "mcpServers": { + "sre-gateway": { + "command": "uv", + "args": ["--directory", "/path/to/project", "run", "mcp"], + "env": { + "MCP_SERVER_URL": "https://gateway.example.com/mcp/sre-gateway/mcp", + "MCP_SERVER_AUTH_TOKEN": "Bearer " + } + } + } +} +``` + +#### VS Code Extension Configuration +**File**: `.oauth-tokens/vscode_mcp.json` +```json +{ + "mcpServers": { + "sre-gateway": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-fetch"], + "env": { + "FETCH_BASE_URL": "https://gateway.example.com/mcp/sre-gateway/mcp", + "FETCH_HEADERS": "{\"Authorization\": \"Bearer \"}" + } + } + } +} +``` + +### Service Types + +The service automatically includes: + +1. **OAuth Services** - Services requiring external authentication (e.g., Atlassian, SRE Gateway) +2. **No-Auth Services** - Local services with `auth_type: "none"` (e.g., Current Time, Real Server Fake Tools) + +## Integration Examples + +### With JWT Token Vending Service + +The token refresh service complements the [JWT Token Vending Service](jwt-token-vending.md) by: + +1. **Monitoring vended tokens** for expiration +2. **Automatically refreshing** tokens using stored refresh tokens +3. **Updating MCP configurations** with new tokens +4. **Maintaining continuous service** without manual intervention + +### With Existing Authentication Flow + +```mermaid +sequenceDiagram + participant User as User/Script + participant Vending as Token Vending Service + participant Refresher as Token Refresh Service + participant External as External Service + participant MCP as MCP Client + + User->>Vending: Request JWT token + Vending->>User: Return token + refresh token + Vending->>Refresher: Save tokens to .oauth-tokens/ + + loop Every 5 minutes + Refresher->>Refresher: Check token expiration + alt Token expires within buffer time + Refresher->>External: Refresh token + External->>Refresher: New token + Refresher->>Refresher: Update .oauth-tokens/ + Refresher->>Refresher: Regenerate MCP configs + end + end + + MCP->>Refresher: Read latest MCP config + MCP->>External: Use refreshed token +``` + +## Monitoring and Logging + +### Log Levels + +- **INFO** - Normal operations, token refreshes, config generation +- **WARNING** - Token refresh failures, missing services +- **ERROR** - Critical failures, authentication errors +- **DEBUG** - Detailed trace information for troubleshooting + +### Sample Log Output + +``` +2024-09-06 15:30:00,123 - Token refresh check starting... +2024-09-06 15:30:00,124 - Found 2 egress token files to check +2024-09-06 15:30:00,125 - bedrock-agentcore-sre-gateway-egress.json: expires in 2 hours, no refresh needed +2024-09-06 15:30:00,126 - atlassian-atlassian-egress.json: expires in 45 minutes, refreshing... +2024-09-06 15:30:01,234 - Successfully refreshed token for atlassian-atlassian- +2024-09-06 15:30:01,235 - Scanning for no-auth services... +2024-09-06 15:30:01,236 - Found 3 no-auth services: mcpgw, currenttime, realserverfaketools +2024-09-06 15:30:01,237 - Generating MCP configurations... +2024-09-06 15:30:01,345 - Generated Roocode config with 5 servers +2024-09-06 15:30:01,346 - Generated VSCode config with 5 servers +2024-09-06 15:30:01,347 - Token refresh cycle completed successfully +``` + +## Troubleshooting + +### Common Issues + +#### Service Won't Start + +**Symptoms**: Service exits immediately or fails to start +**Causes**: +- Missing dependencies +- Invalid OAuth token files +- Permission issues + +**Solutions**: +```bash +# Check dependencies +uv run python -c "import httpx, json, time, argparse, asyncio" + +# Verify token files +ls -la .oauth-tokens/*.json + +# Check permissions +chmod +x credentials-provider/token_refresher.py +chmod +x start_token_refresher.sh +``` + +#### Token Refresh Failures + +**Symptoms**: Tokens not being refreshed, authentication errors +**Causes**: +- Expired refresh tokens +- Invalid OAuth configuration +- Network connectivity issues + +**Solutions**: +```bash +# Check token validity +cat .oauth-tokens/*egress.json | jq '.expires_at' + +# Test network connectivity +curl -v https://your-oauth-provider.com/token + +# Re-run initial OAuth flow +./credentials-provider/oauth/egress_oauth.py +``` + +#### MCP Configuration Issues + +**Symptoms**: MCP clients can't connect, missing services +**Causes**: +- Invalid service configurations +- Missing environment variables +- Incorrect file paths + +**Solutions**: +```bash +# Validate generated configs +cat .oauth-tokens/mcp.json | jq '.' +cat .oauth-tokens/vscode_mcp.json | jq '.' + +# Check service definitions +ls -la registry/servers/*.json + +# Verify environment variables +env | grep -E "(MCP|TOKEN)" +``` + +### Debug Mode + +Enable detailed logging for troubleshooting: + +```bash +# Start with debug logging +uv run python credentials-provider/token_refresher.py --log-level DEBUG + +# Or set environment variable +export LOG_LEVEL=DEBUG +./start_token_refresher.sh +``` + +## Security Considerations + +### Token Storage + +- Token files are stored in `.oauth-tokens/` directory (excluded from Git) +- File permissions are set to `600` (owner read/write only) +- Refresh tokens are encrypted in transit and at rest + +### Network Security + +- All OAuth communication uses HTTPS/TLS +- Tokens are transmitted using secure headers +- Failed authentication attempts are logged and monitored + +### Access Control + +- Service runs with minimal required permissions +- No network listeners (outbound connections only) +- Process isolation using dedicated service account (recommended in production) + +## Production Deployment + +### Systemd Service (Linux) + +Create `/etc/systemd/system/token-refresher.service`: + +```ini +[Unit] +Description=MCP Gateway Token Refresh Service +After=network.target +Wants=network.target + +[Service] +Type=simple +User=mcp-gateway +WorkingDirectory=/opt/mcp-gateway-registry +Environment=TOKEN_REFRESH_INTERVAL=300 +Environment=TOKEN_EXPIRY_BUFFER=3600 +ExecStart=/opt/mcp-gateway-registry/.venv/bin/python credentials-provider/token_refresher.py +Restart=always +RestartSec=10 + +[Install] +WantedBy=multi-user.target +``` + +Enable and start: +```bash +sudo systemctl enable token-refresher +sudo systemctl start token-refresher +sudo systemctl status token-refresher +``` + +### Docker Deployment + +```dockerfile +FROM python:3.12-slim + +WORKDIR /app +COPY . . +RUN pip install uv && uv install + +CMD ["uv", "run", "python", "credentials-provider/token_refresher.py"] +``` + +### Health Monitoring + +Set up monitoring for production: + +```bash +# Create health check script +cat > /opt/scripts/check-token-refresher.sh << 'EOF' +#!/bin/bash +if ! pgrep -f "token_refresher.py" > /dev/null; then + echo "CRITICAL: Token refresher service is not running" + exit 2 +fi +echo "OK: Token refresher service is running" +exit 0 +EOF +``` + +## API Reference + +### Service Methods + +The token refresher service provides these internal methods: + +- `_check_token_expiry()` - Check if token needs refresh +- `_refresh_oauth_token()` - Refresh an expired token +- `_scan_noauth_services()` - Discover no-auth services +- `_generate_mcp_configs()` - Generate MCP client configurations +- `_save_configurations()` - Write config files to disk + +### Configuration Schema + +#### Egress Token File Format +```json +{ + "access_token": "eyJ...", + "refresh_token": "eyJ...", + "expires_at": 1725634800, + "token_type": "Bearer", + "scope": "read write" +} +``` + +#### MCP Server Configuration Format +```json +{ + "server_name": "example-service", + "auth_type": "oauth" | "none", + "path": "/mcp/example-service/mcp", + "supported_transports": ["streamable-http", "sse"] +} +``` + +## Related Documentation + +- [Authentication Guide](auth.md) - OAuth setup and configuration +- [JWT Token Vending](jwt-token-vending.md) - Token generation and management +- [AI Coding Assistants Setup](ai-coding-assistants-setup.md) - Client configuration +- [Configuration Reference](configuration.md) - Environment variables and settings + +## Support + +For issues with the token refresh service: + +1. Check the [Troubleshooting Guide](FAQ.md) +2. Enable debug logging to gather detailed information +3. Search existing [GitHub Issues](https://github.com/agentic-community/mcp-gateway-registry/issues) +4. Create a new issue with logs and configuration details \ No newline at end of file diff --git a/frontend/src/components/ServerCard.tsx b/frontend/src/components/ServerCard.tsx index abbf0d5..1303197 100644 --- a/frontend/src/components/ServerCard.tsx +++ b/frontend/src/components/ServerCard.tsx @@ -22,7 +22,7 @@ interface Server { last_checked_time?: string; usersCount?: number; rating?: number; - status?: 'healthy' | 'unhealthy' | 'unknown'; + status?: 'healthy' | 'healthy-auth-expired' | 'unhealthy' | 'unknown'; num_tools?: number; } @@ -96,6 +96,8 @@ const ServerCard: React.FC = ({ server, onToggle, onEdit, canMo switch (server.status) { case 'healthy': return ; + case 'healthy-auth-expired': + return ; case 'unhealthy': return ; default: @@ -107,6 +109,8 @@ const ServerCard: React.FC = ({ server, onToggle, onEdit, canMo switch (server.status) { case 'healthy': return 'bg-green-100 text-green-800 dark:bg-green-900/30 dark:text-green-400'; + case 'healthy-auth-expired': + return 'bg-orange-100 text-orange-800 dark:bg-orange-900/30 dark:text-orange-400'; case 'unhealthy': return 'bg-red-100 text-red-800 dark:bg-red-900/30 dark:text-red-400'; default: @@ -146,6 +150,7 @@ const ServerCard: React.FC = ({ server, onToggle, onEdit, canMo if (onServerUpdate && response.data) { const updates: Partial = { status: response.data.status === 'healthy' ? 'healthy' : + response.data.status === 'healthy-auth-expired' ? 'healthy-auth-expired' : response.data.status === 'unhealthy' ? 'unhealthy' : 'unknown', last_checked_time: response.data.last_checked_iso, num_tools: response.data.num_tools @@ -294,12 +299,15 @@ const ServerCard: React.FC = ({ server, onToggle, onEdit, canMo
{server.status === 'healthy' ? 'Healthy' : + server.status === 'healthy-auth-expired' ? 'Healthy (Auth Expired)' : server.status === 'unhealthy' ? 'Unhealthy' : 'Unknown'}
diff --git a/frontend/src/hooks/useServerStats.ts b/frontend/src/hooks/useServerStats.ts index 49ab3e1..7804554 100644 --- a/frontend/src/hooks/useServerStats.ts +++ b/frontend/src/hooks/useServerStats.ts @@ -11,7 +11,7 @@ interface Server { last_checked_time?: string; usersCount?: number; rating?: number; - status?: 'healthy' | 'unhealthy' | 'unknown'; + status?: 'healthy' | 'healthy-auth-expired' | 'unhealthy' | 'unknown'; num_tools?: number; } diff --git a/frontend/src/pages/Dashboard.tsx b/frontend/src/pages/Dashboard.tsx index 243d1bb..e28b3d6 100644 --- a/frontend/src/pages/Dashboard.tsx +++ b/frontend/src/pages/Dashboard.tsx @@ -16,7 +16,7 @@ interface Server { last_checked_time?: string; usersCount?: number; rating?: number; - status?: 'healthy' | 'unhealthy' | 'unknown'; + status?: 'healthy' | 'healthy-auth-expired' | 'unhealthy' | 'unknown'; num_tools?: number; proxy_pass_url?: string; license?: string; diff --git a/registry/constants.py b/registry/constants.py new file mode 100644 index 0000000..4580832 --- /dev/null +++ b/registry/constants.py @@ -0,0 +1,60 @@ +""" +Constants and enums for the MCP Gateway Registry. +""" + +from enum import Enum +from typing import List +from pydantic import BaseModel + + +class HealthStatus(str, Enum): + """Health status constants for services.""" + + HEALTHY = "healthy" + HEALTHY_AUTH_EXPIRED = "healthy-auth-expired" + UNHEALTHY_TIMEOUT = "unhealthy: timeout" + UNHEALTHY_CONNECTION_ERROR = "unhealthy: connection error" + UNHEALTHY_ENDPOINT_CHECK_FAILED = "unhealthy: endpoint check failed" + UNHEALTHY_MISSING_PROXY_URL = "unhealthy: missing proxy URL" + CHECKING = "checking" + UNKNOWN = "unknown" + + @classmethod + def get_healthy_statuses(cls) -> List[str]: + """Get list of statuses that should be considered healthy for nginx inclusion.""" + return [cls.HEALTHY, cls.HEALTHY_AUTH_EXPIRED] + + @classmethod + def is_healthy(cls, status: str) -> bool: + """Check if a status should be considered healthy.""" + return status in cls.get_healthy_statuses() + + +class TransportType(str, Enum): + """Supported transport types for MCP servers.""" + + STREAMABLE_HTTP = "streamable-http" + SSE = "sse" + + +class RegistryConstants(BaseModel): + """Registry configuration constants.""" + + class Config: + """Pydantic config.""" + frozen = True + + # Health check settings + DEFAULT_HEALTH_CHECK_TIMEOUT: int = 30 + HEALTH_CHECK_INTERVAL: int = 30 + + # Nginx settings + NGINX_CONFIG_PATH: str = "/etc/nginx/conf.d/nginx_rev_proxy.conf" + + # Server settings + DEFAULT_TRANSPORT: str = TransportType.STREAMABLE_HTTP + SUPPORTED_TRANSPORTS: List[str] = [TransportType.STREAMABLE_HTTP, TransportType.SSE] + + +# Global instance +REGISTRY_CONSTANTS = RegistryConstants() \ No newline at end of file diff --git a/registry/core/nginx_service.py b/registry/core/nginx_service.py index eac03a9..9f0a240 100644 --- a/registry/core/nginx_service.py +++ b/registry/core/nginx_service.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse from .config import settings +from registry.constants import HealthStatus logger = logging.getLogger(__name__) @@ -107,10 +108,11 @@ async def generate_config_async(self, servers: Dict[str, Dict[str, Any]]) -> boo for path, server_info in servers.items(): proxy_pass_url = server_info.get("proxy_pass_url") if proxy_pass_url: - # Check if server is healthy - health_status = health_service.server_health_status.get(path, "unknown") + # Check if server is healthy (including auth-expired which is still reachable) + health_status = health_service.server_health_status.get(path, HealthStatus.UNKNOWN) - if health_status == "healthy": + # Include servers that are healthy or just have expired auth (server is up) + if HealthStatus.is_healthy(health_status): # Generate transport-aware location blocks transport_blocks = self._generate_transport_location_blocks(path, server_info) location_blocks.extend(transport_blocks) diff --git a/registry/health/service.py b/registry/health/service.py index d130be7..7e181bb 100644 --- a/registry/health/service.py +++ b/registry/health/service.py @@ -9,6 +9,7 @@ from time import time from ..core.config import settings +from registry.constants import HealthStatus logger = logging.getLogger(__name__) @@ -358,7 +359,7 @@ async def _check_single_service(self, client: httpx.AsyncClient, service_path: s from ..services.server_service import server_service proxy_pass_url = server_info.get("proxy_pass_url") - previous_status = self.server_health_status.get(service_path, "unknown") + previous_status = self.server_health_status.get(service_path, HealthStatus.UNKNOWN) new_status = previous_status try: @@ -370,15 +371,15 @@ async def _check_single_service(self, client: httpx.AsyncClient, service_path: s # If service transitioned to healthy (including auth-expired), fetch tool list (but don't block) # Only do this for fully healthy status, not auth-expired - if previous_status != "healthy" and status_detail == "healthy": + if previous_status != HealthStatus.HEALTHY and status_detail == HealthStatus.HEALTHY: asyncio.create_task(self._update_tools_background(service_path, proxy_pass_url)) else: new_status = status_detail # Detailed error message from transport check except httpx.TimeoutException: - new_status = "unhealthy: timeout" + new_status = HealthStatus.UNHEALTHY_TIMEOUT except httpx.ConnectError: - new_status = "unhealthy: connection failed" + new_status = HealthStatus.UNHEALTHY_CONNECTION_ERROR except Exception as e: new_status = f"error: {type(e).__name__}" @@ -465,7 +466,7 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient tuple[bool, str]: (is_healthy, status_detail) """ if not proxy_pass_url: - return False, "unhealthy: missing proxy URL" + return False, HealthStatus.UNHEALTHY_MISSING_PROXY_URL # Get transport information from server_info supported_transports = server_info.get("supported_transports", ["streamable-http"]) @@ -490,9 +491,9 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.debug(f"SSE endpoint {proxy_pass_url} returned 200 OK before timeout - considering healthy") - return True, "healthy" + return True, HealthStatus.HEALTHY # For SSE, timeout after initial connection usually means server is responding - return True, "healthy" + return True, HealthStatus.HEALTHY except Exception as e: logger.warning(f"SSE endpoint {proxy_pass_url} failed with exception: {type(e).__name__} - {e}") return False, f"unhealthy: {type(e).__name__}" @@ -500,7 +501,7 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient logger.info(f"[TRACE] Detected MCP endpoint in URL, using standard HTTP handling") response = await client.get(proxy_pass_url, headers=headers, follow_redirects=True) if self._is_mcp_endpoint_healthy(response): - return True, "healthy" + return True, HealthStatus.HEALTHY else: return False, f"unhealthy: status {response.status_code}" except Exception as e: @@ -531,14 +532,14 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient if response.status_code in [401, 403]: logger.info(f"[TRACE] Auth failure detected ({response.status_code}) for {endpoint}, trying ping without auth") if await self._try_ping_without_auth(client, endpoint): - return True, "healthy-auth-expired" + return True, HealthStatus.HEALTHY_AUTH_EXPIRED else: return False, f"unhealthy: auth failed and ping without auth failed" # Check normal health status if self._is_mcp_endpoint_healthy_streamable(response): logger.info(f"Health check succeeded at {endpoint}") - return True, "healthy" + return True, HealthStatus.HEALTHY else: logger.warning(f"Health check failed for {endpoint}: Status {response.status_code}, Response: {response.text}") return False, f"unhealthy: status {response.status_code}" @@ -558,14 +559,14 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient timeout = httpx.Timeout(connect=5.0, read=2.0, write=5.0, pool=5.0) response = await client.get(sse_endpoint, headers=headers, follow_redirects=True, timeout=timeout) if self._is_mcp_endpoint_healthy(response): - return True, "healthy" + return True, HealthStatus.HEALTHY except (httpx.TimeoutException, asyncio.TimeoutError) as e: # For SSE endpoints, timeout while reading streaming response is normal after getting 200 OK logger.info(f"SSE endpoint {sse_endpoint} timed out while streaming (expected): {e}") # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.info(f"SSE endpoint {sse_endpoint} returned 200 OK before timeout - considering healthy") - return True, "healthy" + return True, HealthStatus.HEALTHY # For SSE, timeout after initial connection usually means server is responding return True, "healthy" except Exception as e: @@ -588,7 +589,7 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient logger.info(f"[TRACE] Response status: {response.status_code}") if self._is_mcp_endpoint_healthy_streamable(response): logger.info(f"Health check succeeded at {endpoint}") - return True, "healthy" + return True, HealthStatus.HEALTHY else: logger.warning(f"Health check failed for {endpoint}: Status {response.status_code}, Response: {response.text}") return False, f"unhealthy: status {response.status_code}" @@ -603,14 +604,14 @@ async def _check_server_endpoint_transport_aware(self, client: httpx.AsyncClient timeout = httpx.Timeout(connect=5.0, read=2.0, write=5.0, pool=5.0) response = await client.get(sse_endpoint, headers=headers, follow_redirects=True, timeout=timeout) if self._is_mcp_endpoint_healthy(response): - return True, "healthy" + return True, HealthStatus.HEALTHY except (httpx.TimeoutException, asyncio.TimeoutError) as e: # For SSE endpoints, timeout while reading streaming response is normal after getting 200 OK logger.info(f"SSE endpoint {sse_endpoint} timed out while streaming (expected): {e}") # If we can extract status code from response, check if it was 200 if hasattr(e, 'response') and e.response and e.response.status_code == 200: logger.info(f"SSE endpoint {sse_endpoint} returned 200 OK before timeout - considering healthy") - return True, "healthy" + return True, HealthStatus.HEALTHY # For SSE, timeout after initial connection usually means server is responding return True, "healthy" except Exception as e: @@ -770,9 +771,9 @@ async def perform_immediate_health_check(self, service_path: str) -> tuple[str, return current_status, last_checked_time # Set status to 'checking' before performing the check - logger.info(f"Setting status to 'checking' for {service_path} ({proxy_pass_url})...") - previous_status = self.server_health_status.get(service_path, "unknown") - self.server_health_status[service_path] = "checking" + logger.info(f"Setting status to '{HealthStatus.CHECKING}' for {service_path} ({proxy_pass_url})...") + previous_status = self.server_health_status.get(service_path, HealthStatus.UNKNOWN) + self.server_health_status[service_path] = HealthStatus.CHECKING try: async with httpx.AsyncClient(timeout=httpx.Timeout(settings.health_check_timeout_seconds)) as client: @@ -842,8 +843,8 @@ def _get_service_health_data_fast(self, service_path: str, server_info: Dict) -> # Use cached status, only update if transitioning from disabled cached_status = self.server_health_status.get(service_path, "unknown") if cached_status == "disabled": - status = "checking" - self.server_health_status[service_path] = "checking" + status = HealthStatus.CHECKING + self.server_health_status[service_path] = HealthStatus.CHECKING else: status = cached_status From 6c43ada4e0d4b16ebb3bc91b74cfc0b35726eaa1 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Sat, 6 Sep 2025 15:22:30 +0000 Subject: [PATCH 3/5] Update AI coding assistants documentation - Fix Roo Code configuration file name to mcp_settings.json (with underscore) - Add comprehensive token refresh service integration documentation - Remove emojis for professional appearance - Remove multi-gateway support section - Add symbolic link setup option for automatic configuration updates - Enhance troubleshooting with token refresh service specific guidance --- docs/ai-coding-assistants-setup.md | 70 ++++++++++++++++++++++-------- 1 file changed, 53 insertions(+), 17 deletions(-) diff --git a/docs/ai-coding-assistants-setup.md b/docs/ai-coding-assistants-setup.md index 6d146bb..eca0b03 100644 --- a/docs/ai-coding-assistants-setup.md +++ b/docs/ai-coding-assistants-setup.md @@ -54,7 +54,16 @@ Roo Code demonstrates the power of enterprise governance for AI development tool **Setup:** ```bash # Copy Roo Code configuration -cp .oauth-tokens/mcp.json ~/.vscode/mcp-settings.json +cp .oauth-tokens/mcp.json ~/.vscode/mcp_settings.json +``` + +**Alternative Setup Options:** +```bash +# Option 1: Direct copy (recommended) +cp .oauth-tokens/mcp.json ~/.vscode/mcp_settings.json + +# Option 2: Create symbolic link for automatic updates +ln -sf "$(pwd)/.oauth-tokens/mcp.json" ~/.vscode/mcp_settings.json ``` **Enterprise Use Case:** @@ -88,22 +97,22 @@ cp .oauth-tokens/mcp.json ~/.vscode/mcp-settings.json **Key Enterprise Benefits:** -🎯 **Centralized Control** +**Centralized Control** - IT teams manage approved MCP servers across all development environments - Consistent tool availability regardless of developer setup - Rapid deployment of new tools to entire organization -🔐 **Secure Authentication** +**Secure Authentication** - All tool access routes through enterprise identity systems (Amazon Cognito) - No individual API key management required -- Automatic token refresh and rotation +- Automatic token refresh and rotation via [Token Refresh Service](token-refresh-service.md) -📊 **Usage Analytics & Compliance** +**Usage Analytics & Compliance** - Track which developers use which tools and when - Generate compliance reports for audit requirements - Monitor tool adoption and usage patterns across teams -⚡ **Developer Productivity** +**Developer Productivity** - Zero configuration required for approved tools - Instant access to new enterprise tools as they're approved - Same experience across VS Code, Cursor, Claude Code, and other assistants @@ -189,9 +198,30 @@ async with sse_client('https://gateway.com/mcpgw/sse', headers=headers) as (read ## Configuration Management -### Automatic Updates +### Automatic Token Refresh + +The MCP Gateway includes an [Automated Token Refresh Service](token-refresh-service.md) that provides continuous token management: + +```bash +# Start the token refresh service (runs in background) +./start_token_refresher.sh + +# Service automatically: +# - Monitors token expiration (1-hour buffer by default) +# - Refreshes tokens before they expire +# - Updates all MCP client configurations +# - Generates fresh configs for all AI assistants +``` + +**Key Benefits:** +- **Zero Downtime**: Tokens refresh automatically before expiration +- **Continuous Operation**: AI assistants never lose access due to expired tokens +- **Multiple Client Support**: Updates configurations for VS Code, Roo Code, Claude Code, etc. +- **Background Operation**: Runs as a service with comprehensive logging + +### Manual Configuration Updates -The credential generation script automatically updates configurations when run: +If you need to manually regenerate configurations: ```bash # Regenerate all configurations @@ -201,6 +231,8 @@ The credential generation script automatically updates configurations when run: ./scripts/update-ai-assistants.sh # Custom script you can create ``` +**For AI assistants using symbolic links** (recommended setup), configuration updates are automatic since they point to the live `.oauth-tokens/` files. + ### Environment-Specific Configurations **Development Environment:** @@ -217,21 +249,25 @@ ENVIRONMENT=prod ./credentials-provider/generate_creds.sh cp .oauth-tokens/prod-* ~/.vscode/ ``` -### Multi-Gateway Support +## Troubleshooting -For organizations with multiple MCP Gateway instances: +### Authentication Issues + +**Token Expired:** +*If using Token Refresh Service (recommended):* ```bash -# Configure multiple gateways -cp .oauth-tokens/gateway-us-east-1.json ~/.vscode/settings-us-east-1.json -cp .oauth-tokens/gateway-eu-west-1.json ~/.vscode/settings-eu-west-1.json -``` +# Check if token refresh service is running +ps aux | grep token_refresher -## Troubleshooting +# Restart token refresh service if needed +./start_token_refresher.sh -### Authentication Issues +# Check service logs +tail -f token_refresher.log +``` -**Token Expired:** +*Manual token refresh:* ```bash # Regenerate credentials ./credentials-provider/generate_creds.sh From 22db214cf9fac66da11f892ae9c56a66879392c7 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Sat, 6 Sep 2025 11:27:12 -0400 Subject: [PATCH 4/5] Potential fix for code scanning alert no. 61: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- credentials-provider/token_refresher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/credentials-provider/token_refresher.py b/credentials-provider/token_refresher.py index 2f544f1..a6a0808 100755 --- a/credentials-provider/token_refresher.py +++ b/credentials-provider/token_refresher.py @@ -112,7 +112,7 @@ def _get_all_tokens() -> List[Tuple[Path, Dict]]: List of (filepath, token_data) tuples for all valid tokens """ if not OAUTH_TOKENS_DIR.exists(): - logger.error(f"OAuth tokens directory not found: {OAUTH_TOKENS_DIR}") + logger.error("OAuth tokens directory not found") return [] all_tokens = [] From ed5cb32835d750ce89081a0265671c70815035a2 Mon Sep 17 00:00:00 2001 From: Amit Arora Date: Sat, 6 Sep 2025 11:36:19 -0400 Subject: [PATCH 5/5] Potential fix for code scanning alert no. 70: Clear-text logging of sensitive information Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- credentials-provider/token_refresher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/credentials-provider/token_refresher.py b/credentials-provider/token_refresher.py index a6a0808..2641576 100755 --- a/credentials-provider/token_refresher.py +++ b/credentials-provider/token_refresher.py @@ -1023,7 +1023,7 @@ def main(): logger.info("OAuth Token Refresher Service Starting") logger.info(f"Check interval: {args.interval} seconds") logger.info(f"Expiry buffer: {args.buffer} seconds ({args.buffer / 3600:.1f} hours)") - logger.info(f"OAuth tokens directory: {OAUTH_TOKENS_DIR}") + logger.info("OAuth tokens directory is configured") logger.info("=" * 60) # Set up signal handlers and PID file for continuous mode