ovh
diff --git a/‎README.md‎
Lines changed: 44 additions & 0 deletions b/‎README.md‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎shai-core/src/agent/actions/brain.rs‎
Lines changed: 69 additions & 2 deletions b/‎shai-core/src/agent/actions/brain.rs‎
Lines changed: 69 additions & 2 deletions
diff --git a/‎shai-core/src/agent/brain.rs‎
Lines changed: 26 additions & 1 deletion b/‎shai-core/src/agent/brain.rs‎
Lines changed: 26 additions & 1 deletion
diff --git a/‎shai-core/src/agent/events.rs‎
Lines changed: 19 additions & 0 deletions b/‎shai-core/src/agent/events.rs‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎shai-core/src/agent/output/log.rs‎
Lines changed: 16 additions & 0 deletions b/‎shai-core/src/agent/output/log.rs‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎shai-core/src/agent/output/pretty.rs‎
Lines changed: 41 additions & 0 deletions b/‎shai-core/src/agent/output/pretty.rs‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎shai-core/src/config/config.rs‎
Lines changed: 21 additions & 4 deletions b/‎shai-core/src/config/config.rs‎
Lines changed: 21 additions & 4 deletions
@@ -29,6 +29,50 @@ the `shai` binary will be installed in `$HOME/.local/bin`
 
 ## Configure a provider and Run!
 
+### Configuration files
+
+Shai can be configured via **configuration files** written in JSON. By default, the configuration file is `auth.config` located in `~/.config/shai/`. The file defines the list of LLM providers, the selected provider, model, and tool call method.
+
+#### Example `.shai.config`
+```json
+{
+  "providers": [
+    {
+      "provider": "ovhcloud",
+      "env_vars": {
+        "OVH_BASE_URL": "https://gpt-oss-120b.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1"
+      },
+      "model": "gpt-oss-120b",
+      "tool_method": "FunctionCall",
+      "max_context_tokens": 8192
+    }
+  ],
+  "selected_provider": 0
+}
+```
+
+- **providers**: an array of provider definitions. Each provider can specify environment variables (`env_vars`), the model name, the tool call method (`FunctionCall` or `Chat`), and optionally `max_context_tokens` to limit the context size.
+- **selected_provider**: the index of the provider to use (starting at `0`).
+- **max_context_tokens** (optional, per provider): maximum number of tokens that can be sent in the context to the LLM. If omitted, the default for the model is used.
+
+You can create multiple configuration files for different agents (see the *Custom Agent* section). To use a specific configuration, place the file in `~/.config/shai/agents/` and run the agent by its filename (without the `.config` extension):
+```
+shai my_custom_agent
+```
+
+Shai will automatically load the configuration, set the required environment variables, and use the selected provider for all subsequent interactions.
+
+### Using the configuration
+
+- **Automatic loading**: If a `.shai.config` file is present in the current directory, Shai will load it automatically.
+- **Explicit loading**: Use the `--config <path>` flag to specify a custom configuration file:
+```
+shai --config ~/.config/shai/agents/example.config
+```
+
+The configuration system allows you to switch providers, models, or tool call methods without recompiling the binary.
+
+
 By default `shai` uses OVHcloud as an anonymous user meaning you will be rate limited! If you want to sign in with your account or select another provider, run:
 
 ```
 
@@ -47,7 +47,7 @@ impl AgentCore {
 
     /// Process a brain task result
     pub async fn process_next_step(&mut self, result: Result<ThinkerDecision, AgentError>) -> Result<(), AgentError> {
-        let ThinkerDecision{message, flow, token_usage} = self.handle_brain_error(result).await?;
+        let ThinkerDecision{message, flow, token_usage, compression_info} = self.handle_brain_error(result).await?;
         let ChatMessage::Assistant { content, reasoning_content, tool_calls, .. } = message.clone() else {
             return self.handle_brain_error::<ThinkerDecision>(
                 Err(AgentError::InvalidResponse(format!("ChatMessage::Assistant expected, but got {:?} instead", message)))).await.map(|_| ()
@@ -72,6 +72,18 @@ impl AgentCore {
                 output_tokens
             }).await;
         }
+
+        // Emit context compression event if available
+        if let Some(compression_info) = compression_info {
+            let _ = self.emit_event(AgentEvent::ContextCompressed {
+                original_message_count: compression_info.original_message_count,
+                compressed_message_count: compression_info.compressed_message_count,
+                tokens_before: compression_info.tokens_before,
+                current_tokens: compression_info.current_tokens,
+                max_tokens: compression_info.max_tokens,
+                ai_summary: compression_info.ai_summary,
+            }).await;
+        }
 
         // run tool call if any
         let tool_calls_from_brain = tool_calls.unwrap_or(vec![]);
@@ -86,19 +98,74 @@ impl AgentCore {
                 self.set_state(InternalAgentState::Running).await;
             }
             ThinkerFlowControl::AgentPause => {
+                // Check if we need to compress context when task is complete
+                self.check_and_compress_context().await?;
                 self.set_state(InternalAgentState::Paused).await;
             }
         }
         Ok(())
     }
 
+    /// Check if context compression is needed and apply it when task is complete
+    async fn check_and_compress_context(&mut self) -> Result<(), AgentError> {
+        // Extract compression logic from the brain if it's a CoderBrain
+        let brain = self.brain.clone();
+        let brain_read = brain.read().await;
+
+        // This is a bit hacky but we need to check if the brain has a compressor
+        // We'll use Any trait to downcast to CoderBrain
+        use std::any::Any;
+
+        if let Some(coder_brain) = (&**brain_read as &dyn Any).downcast_ref::<crate::runners::coder::coder::CoderBrain>() {
+            if let Some(compressor) = &coder_brain.context_compressor {
+                let compressor_clone = compressor.clone();
+                drop(brain_read); // Release the read lock
+
+                let trace = self.trace.read().await.clone();
+                let mut compressor_clone = compressor_clone;
+
+                if compressor_clone.should_compress_conversation(&trace) {
+                    let (compressed_trace, compression_info) = compressor_clone.compress_messages(trace).await;
+
+                    // Update the trace with compressed version
+                    {
+                        let mut trace_write = self.trace.write().await;
+                        *trace_write = compressed_trace;
+                    }
+
+                    // Update the compressor in the brain
+                    {
+                        let mut brain_write = brain.write().await;
+                        if let Some(coder_brain_mut) = (&mut **brain_write as &mut dyn Any).downcast_mut::<crate::runners::coder::coder::CoderBrain>() {
+                            coder_brain_mut.context_compressor = Some(compressor_clone);
+                        }
+                    }
+
+                    // Emit compression event if compression occurred
+                    if let Some(compression_info) = compression_info {
+                        let _ = self.emit_event(AgentEvent::ContextCompressed {
+                            original_message_count: compression_info.original_message_count,
+                            compressed_message_count: compression_info.compressed_message_count,
+                            tokens_before: compression_info.tokens_before,
+                            current_tokens: compression_info.current_tokens,
+                            max_tokens: compression_info.max_tokens,
+                            ai_summary: compression_info.ai_summary,
+                        }).await;
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
     // Helper method that emits error events before returning the error
     async fn handle_brain_error<T>(&mut self, result: Result<T, AgentError>) -> Result<T, AgentError> {
         match result {
             Ok(value) => Ok(value),
             Err(error) => {
                 self.set_state(InternalAgentState::Paused).await;
-                let _ = self.emit_event(AgentEvent::BrainResult { 
+                let _ = self.emit_event(AgentEvent::BrainResult {
                     timestamp: Utc::now(),
                     thought: Err(error.clone())
                 }).await;
 
@@ -4,6 +4,7 @@ use shai_llm::{ChatMessage, ToolCallMethod};
 use tokio::sync::RwLock;
 
 use crate::tools::types::AnyToolBox;
+use crate::runners::compacter::CompressionInfo;
 use super::error::AgentError;
 
 
@@ -29,6 +30,7 @@ pub struct ThinkerDecision {
     pub message: ChatMessage,
     pub flow:    ThinkerFlowControl,
     pub token_usage: Option<(u32, u32)>, // (input_tokens, output_tokens)
+    pub compression_info: Option<CompressionInfo>,
 }
 
 impl ThinkerDecision {
@@ -37,6 +39,7 @@ impl ThinkerDecision {
             message,
             flow: ThinkerFlowControl::AgentPause,
             token_usage: None,
+            compression_info: None,
         }
     }
 
@@ -45,6 +48,7 @@ impl ThinkerDecision {
             message,
             flow: ThinkerFlowControl::AgentContinue,
             token_usage: None,
+            compression_info: None,
         }
     }
 
@@ -53,6 +57,7 @@ impl ThinkerDecision {
             message,
             flow: ThinkerFlowControl::AgentPause,
             token_usage: None,
+            compression_info: None,
         }
     }
 
@@ -61,6 +66,7 @@ impl ThinkerDecision {
             message,
             flow: ThinkerFlowControl::AgentContinue,
             token_usage: Some((input_tokens, output_tokens)),
+            compression_info: None,
         }
     }
 
@@ -69,6 +75,25 @@ impl ThinkerDecision {
             message,
             flow: ThinkerFlowControl::AgentPause,
             token_usage: Some((input_tokens, output_tokens)),
+            compression_info: None,
+        }
+    }
+
+    pub fn agent_continue_with_compression(message: ChatMessage, input_tokens: u32, output_tokens: u32, compression_info: CompressionInfo) -> Self {
+        ThinkerDecision{
+            message,
+            flow: ThinkerFlowControl::AgentContinue,
+            token_usage: Some((input_tokens, output_tokens)),
+            compression_info: Some(compression_info),
+        }
+    }
+
+    pub fn agent_pause_with_compression(message: ChatMessage, input_tokens: u32, output_tokens: u32, compression_info: CompressionInfo) -> Self {
+        ThinkerDecision{
+            message,
+            flow: ThinkerFlowControl::AgentPause,
+            token_usage: Some((input_tokens, output_tokens)),
+            compression_info: Some(compression_info),
         }
     }
 
@@ -79,7 +104,7 @@ impl ThinkerDecision {
 
 /// Core thinking interface - pure decision making
 #[async_trait]
-pub trait Brain: Send + Sync {
+pub trait Brain: Send + Sync + std::any::Any {
     /// This method is called at every step of the agent to decide next step
     /// note that if the message contains toolcall, it will always continue
     async fn next_step(&mut self, context: ThinkerContext) -> Result<ThinkerDecision, AgentError>;
 
@@ -101,6 +101,15 @@ pub enum AgentEvent {
         input_tokens: u32,
         output_tokens: u32
     },
+    /// Context compression notification
+    ContextCompressed {
+        original_message_count: usize,
+        compressed_message_count: usize,
+        tokens_before: Option<u32>,
+        current_tokens: Option<u32>,
+        max_tokens: u32,
+        ai_summary: Option<String>,
+    },
 }
 
 /// Types of user input that an agent can request
@@ -274,6 +283,16 @@ impl std::fmt::Debug for AgentEvent {
                     .field("output_tokens", output_tokens)
                     .finish()
             }
+            AgentEvent::ContextCompressed { original_message_count, compressed_message_count, tokens_before, current_tokens, max_tokens, ai_summary } => {
+                f.debug_struct("ContextCompressed")
+                    .field("original_message_count", original_message_count)
+                    .field("compressed_message_count", compressed_message_count)
+                    .field("tokens_before", tokens_before)
+                    .field("current_tokens", current_tokens)
+                    .field("max_tokens", max_tokens)
+                    .field("ai_summary", ai_summary)
+                    .finish()
+            }
         }
     }
 }
@@ -56,6 +56,22 @@ impl FileEventLogger {
             AgentEvent::TokenUsage { input_tokens, output_tokens } => {
                 format!("Token Usage: input={} output={} total={}", input_tokens, output_tokens, input_tokens + output_tokens)
             }
+            AgentEvent::ContextCompressed { original_message_count, compressed_message_count, tokens_before, current_tokens, max_tokens, ai_summary } => {
+                let summary_text = if let Some(summary) = ai_summary {
+                    format!(" | Summary: {}", summary)
+                } else {
+                    "".to_string()
+                };
+
+                let token_info = match (tokens_before, current_tokens) {
+                    (Some(before), Some(after)) => format!(", tokens: {} → {}", before, after),
+                    (Some(before), None) => format!(", tokens before: {}", before),
+                    _ => "".to_string(),
+                };
+
+                format!("Context Compressed with AI Summary: {} → {} messages{}{}",
+                    original_message_count, compressed_message_count, token_info, summary_text)
+            }
         };
 
         let log_line = format!("[{}] {}\n", timestamp.format("%Y-%m-%d %H:%M:%S%.3f"), event_str);
 
@@ -114,6 +114,47 @@ impl PrettyFormatter {
                 // Don't display token usage in the main output - it's handled by /tokens command
                 None
             },
+            AgentEvent::ContextCompressed { original_message_count, compressed_message_count, tokens_before, current_tokens, max_tokens, ai_summary } => {
+                let net_change = if original_message_count > compressed_message_count {
+                    original_message_count - compressed_message_count
+                } else {
+                    0
+                };
+
+                let markdown = match (tokens_before, current_tokens) {
+                    (Some(before), Some(after)) => {
+                        if net_change > 0 {
+                            format!(
+                                "● **Context Compressed with AI Summary** - Summarized {} message(s) to stay within token limits ({} → {} tokens)",
+                                net_change, before, after
+                            )
+                        } else {
+                            format!(
+                                "● **Context Compression Applied** - Added AI summary to optimize token usage ({} → {} tokens)",
+                                before, after
+                            )
+                        }
+                    }
+                    _ => {
+                        if net_change > 0 {
+                            format!(
+                                "● **Context Compressed with AI Summary** - Summarized {} message(s) to stay within token limits",
+                                net_change
+                            )
+                        } else {
+                            format!(
+                                "● **Context Compression Applied** - Added AI summary to optimize token usage"
+                            )
+                        }
+                    }
+                };
+
+                let mut compression_skin = self.skin.clone();
+                compression_skin.paragraph.set_fg(rgb(100, 200, 255)); // Blue for AI compression
+                compression_skin.bold.set_fg(rgb(120, 220, 255)); // Light blue for bold
+
+                Some(compression_skin.term_text(&markdown).to_string())
+            },
         }.map(|s| format!("\n{}", s))
     }
 
 
@@ -12,7 +12,9 @@ pub struct ProviderConfig {
     pub provider: String,
     pub env_vars: std::collections::HashMap<String, String>,
     pub model: String,
-    pub tool_method: ToolCallMethod
+    pub tool_method: ToolCallMethod,
+    #[serde(default)]
+    pub max_context_tokens: Option<u32>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -36,9 +38,23 @@ impl ShaiConfig {
             provider,
             env_vars,
             model,
-            tool_method: ToolCallMethod::FunctionCall
+            tool_method: ToolCallMethod::FunctionCall,
+            max_context_tokens: None,
         };
-        
+
+        self.providers.push(provider_config);
+        self.providers.len() - 1
+    }
+
+    pub fn add_provider_with_context(&mut self, provider: String, env_vars: std::collections::HashMap<String, String>, model: String, max_context_tokens: Option<u32>) -> usize {
+        let provider_config = ProviderConfig {
+            provider,
+            env_vars,
+            model,
+            tool_method: ToolCallMethod::FunctionCall,
+            max_context_tokens,
+        };
+
         self.providers.push(provider_config);
         self.providers.len() - 1
     }
@@ -228,7 +244,8 @@ impl Default for ShaiConfig {
                     (String::from("OVH_BASE_URL"), String::from("https://qwen-3-32b.endpoints.kepler.ai.cloud.ovh.net/api/openai_compat/v1"))
                 ]),
                 model: "Qwen3-32B".to_string(),
-                tool_method: ToolCallMethod::FunctionCall
+                tool_method: ToolCallMethod::FunctionCall,
+                max_context_tokens: Some(32768), // Qwen3-32B has 32k context
             }],
             selected_provider: 0,
             mcp_configs: HashMap::new(),