From 05c18786cc7b1ad9bffd574b0fcc03c18e115529 Mon Sep 17 00:00:00 2001 From: Brian H Date: Fri, 30 May 2025 07:43:16 +0000 Subject: [PATCH 01/22] sessionId= should not be required --- README.md | 22 ++++ justfile | 6 + .../http_sse_server/http_sse_server.rs | 119 ++++++++++++++---- 3 files changed, 120 insertions(+), 27 deletions(-) create mode 100644 justfile diff --git a/README.md b/README.md index 2c90eea..38f4864 100644 --- a/README.md +++ b/README.md @@ -141,6 +141,28 @@ Example: This server implements the Model Context Protocol (MCP) which allows it to be easily integrated with LLM clients that support the protocol. For more information about MCP, visit [the MCP repository](https://github.com/modelcontextprotocol/mcp). +### VScode MCP, RooCode example + +```json +// Roo Code, use bunx or npx, sessionId= +{ + "mcpServers":{ + "rust-crate-docs": { + "command": "bunx", + "args": [ + "-y", + "mcp-remote@latest", + "http://127.0.0.1:3000/sse?sessionId=", + "--allow-http", + "--transport sse-only", + "--debug" + ] + } + } +} +``` + + ## License MIT License diff --git a/justfile b/justfile new file mode 100644 index 0000000..0ca0d5c --- /dev/null +++ b/justfile @@ -0,0 +1,6 @@ +run: + cargo run --bin cratedocs http --address 0.0.0.0:3000 --debug + +debug-mcp-remote: + # use bunx or npx to see how the mcp-remote proxy connects + bunx mcp-remote@latest "http://127.0.0.1:3000/sse" --allow-http --transport sse-only --debug diff --git a/src/transport/http_sse_server/http_sse_server.rs b/src/transport/http_sse_server/http_sse_server.rs index 8d2d884..4811811 100644 --- a/src/transport/http_sse_server/http_sse_server.rs +++ b/src/transport/http_sse_server/http_sse_server.rs @@ -52,47 +52,112 @@ fn session_id() -> SessionId { #[derive(Debug, serde::Deserialize)] #[serde(rename_all = "camelCase")] pub struct PostEventQuery { - pub session_id: String, + #[serde(default)] // Use None if session_id is not present in query + pub session_id: Option, } async fn post_event_handler( State(app): State, - Query(PostEventQuery { session_id }): Query, + Query(query_params): Query, body: Body, ) -> Result { + tracing::debug!(?query_params, "Received POST request"); const BODY_BYTES_LIMIT: usize = 1 << 22; - let write_stream = { - let rg = app.txs.read().await; - rg.get(session_id.as_str()) - .ok_or(StatusCode::NOT_FOUND)? - .clone() - }; - let mut write_stream = write_stream.lock().await; - let mut body = body.into_data_stream(); - if let (_, Some(size)) = body.size_hint() { - if size > BODY_BYTES_LIMIT { + const BUFFER_SIZE: usize = 1 << 12; // For new sessions + + let (session_id_arc, c2s_writer_for_body): (SessionId, C2SWriter) = + match query_params.session_id { + Some(id_str) => { + tracing::debug!(session_id = %id_str, "sessionId provided in query"); + // Convert String to Arc for map lookup + let session_arc: SessionId = Arc::from(id_str.as_str()); + let rg = app.txs.read().await; + match rg.get(&session_arc) { + Some(writer) => { + tracing::debug!(session_id = %session_arc, "Found existing session writer"); + (session_arc, writer.clone()) + } + None => { + tracing::warn!(session_id = %session_arc, "sessionId provided but not found in active sessions"); + return Err(StatusCode::NOT_FOUND); + } + } + } + None => { + tracing::info!("sessionId not provided, creating new session for POST request"); + let new_session_id_arc = session_id(); // fn session_id() -> Arc + tracing::info!(new_session_id = %new_session_id_arc, "Generated new session ID"); + + let (c2s_read, c2s_write_half) = tokio::io::simplex(BUFFER_SIZE); + // s2c_read/write are also needed for the ByteTransport and Server::run + // _s2c_read is not directly used by this POST handler but needed for the spawned server task. + let (_s2c_read, s2c_write_half) = tokio::io::simplex(BUFFER_SIZE); + + let new_c2s_writer_for_map = Arc::new(Mutex::new(c2s_write_half)); + app.txs + .write() + .await + .insert(new_session_id_arc.clone(), new_c2s_writer_for_map.clone()); + tracing::info!(session_id = %new_session_id_arc, "Inserted new session writer into app.txs"); + + // Spawn the server task for the new session + let app_clone = app.clone(); + let task_session_id = new_session_id_arc.clone(); + tokio::spawn(async move { + let router = RouterService(DocRouter::new()); + let server = Server::new(router); + let bytes_transport = ByteTransport::new(c2s_read, s2c_write_half); + tracing::info!(session_id = %task_session_id, "Spawning server task for new POST session"); + let _result = server + .run(bytes_transport) + .await + .inspect_err(|e| { + tracing::error!(?e, session_id = %task_session_id, "Server run error for new POST session") + }); + app_clone.txs.write().await.remove(&task_session_id); + tracing::info!(session_id = %task_session_id, "Cleaned up new POST session from app.txs after server task completion"); + }); + (new_session_id_arc, new_c2s_writer_for_map) + } + }; + + // Process the request body using c2s_writer_for_body + let mut write_stream_locked = c2s_writer_for_body.lock().await; + let mut body_data_stream = body.into_data_stream(); + + if let (_, Some(size_hint)) = body_data_stream.size_hint() { + if size_hint > BODY_BYTES_LIMIT { + tracing::warn!(%session_id_arc, body_size_hint = size_hint, limit = BODY_BYTES_LIMIT, "Payload too large based on hint"); return Err(StatusCode::PAYLOAD_TOO_LARGE); } } - // calculate the body size - let mut size = 0; - while let Some(chunk) = body.next().await { - let Ok(chunk) = chunk else { - return Err(StatusCode::BAD_REQUEST); + + let mut actual_size = 0; + while let Some(chunk_result) = body_data_stream.next().await { + let chunk = match chunk_result { + Ok(c) => c, + Err(e) => { + tracing::error!(%session_id_arc, ?e, "Error reading chunk from body stream"); + return Err(StatusCode::BAD_REQUEST); + } }; - size += chunk.len(); - if size > BODY_BYTES_LIMIT { + actual_size += chunk.len(); + if actual_size > BODY_BYTES_LIMIT { + tracing::warn!(%session_id_arc, actual_body_size = actual_size, limit = BODY_BYTES_LIMIT, "Payload too large during streaming"); return Err(StatusCode::PAYLOAD_TOO_LARGE); } - write_stream - .write_all(&chunk) - .await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + if let Err(e) = write_stream_locked.write_all(&chunk).await { + tracing::error!(%session_id_arc, ?e, "Error writing chunk to session stream"); + return Err(StatusCode::INTERNAL_SERVER_ERROR); + } } - write_stream - .write_u8(b'\n') - .await - .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?; + + if let Err(e) = write_stream_locked.write_u8(b'\n').await { + tracing::error!(%session_id_arc, ?e, "Error writing newline to session stream"); + return Err(StatusCode::INTERNAL_SERVER_ERROR); + } + + tracing::info!(%session_id_arc, "Successfully processed POST request body"); Ok(StatusCode::ACCEPTED) } From 30545c182044bc18c27ce32a3c1b768c2ee9f5b8 Mon Sep 17 00:00:00 2001 From: Brian H Date: Fri, 30 May 2025 08:02:00 +0000 Subject: [PATCH 02/22] example with stdio --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 38f4864..0d33c64 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,27 @@ Example: This server implements the Model Context Protocol (MCP) which allows it to be easily integrated with LLM clients that support the protocol. For more information about MCP, visit [the MCP repository](https://github.com/modelcontextprotocol/mcp). -### VScode MCP, RooCode example +### Vscode MCP, RooCode local example + +```bash +# compile & install cratedocs in ~/.cargo/bin +cargo install --path . +``` +in `mcp_settings.json` +```json +{ + "mcpServers":{ + "rust-crate-local": { + "command": "cratedocs", + "args": [ + "stdio" + ], + } + } +} +``` + +### VScode MCP, RooCode hosted example ```json // Roo Code, use bunx or npx, sessionId= From d92a961c9f610fab53dfbc38ecce680da8dafc66 Mon Sep 17 00:00:00 2001 From: Brian H Date: Sat, 31 May 2025 06:16:58 +0000 Subject: [PATCH 03/22] added install --- justfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/justfile b/justfile index 0ca0d5c..caa54c8 100644 --- a/justfile +++ b/justfile @@ -1,6 +1,10 @@ +install: + cargo install --git https://github.com/PromptExecution/cratedocs-mcp --locked + run: cargo run --bin cratedocs http --address 0.0.0.0:3000 --debug debug-mcp-remote: # use bunx or npx to see how the mcp-remote proxy connects bunx mcp-remote@latest "http://127.0.0.1:3000/sse" --allow-http --transport sse-only --debug + From faa44e7b86e4eb739db88f6ca5c3a2495bb0bf41 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 05:45:08 +0000 Subject: [PATCH 04/22] --tldr checkpoint 1 --- .gitignore | 1 + Cargo.lock | 3 +- Cargo.toml | 6 ++- src/bin/cratedocs.rs | 114 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 110 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index aa8d4f1..b197cae 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ Thumbs.db *.swp *.swo output_tests +target/* diff --git a/Cargo.lock b/Cargo.lock index 849d4b1..541bcbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -375,7 +375,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cratedocs-mcp" -version = "0.1.0" +version = "0.2.0" dependencies = [ "anyhow", "axum", @@ -388,6 +388,7 @@ dependencies = [ "mcp-server", "mockito", "rand 0.8.5", + "regex", "reqwest", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index c4a2f9f..c0feadd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,10 @@ [package] name = "cratedocs-mcp" -version = "0.1.0" +version = "0.2.0" edition = "2021" description = "Rust Documentation MCP Server for LLM crate assistance" -authors = ["Claude "] +authors = ["Brian Horakh ", +"Claude "] license = "MIT" repository = "https://github.com/d6e/cratedocs-mcp" @@ -42,6 +43,7 @@ futures = "0.3" rand = "0.8" clap = { version = "4.4", features = ["derive"] } html2md = "0.2.14" +regex = "1" [dev-dependencies] # Testing utilities diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 977c346..a4aba00 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -9,9 +9,10 @@ use std::net::SocketAddr; use tokio::io::{stdin, stdout}; use tracing_appender::rolling::{RollingFileAppender, Rotation}; use tracing_subscriber::{self, EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; +use regex::Regex; #[derive(Parser)] -#[command(author, version = "0.1.0", about, long_about = None)] +#[command(author, version = "0.2.0", about, long_about = None)] #[command(propagate_version = true)] #[command(disable_version_flag = true)] struct Cli { @@ -70,6 +71,10 @@ enum Commands { /// Output file path (if not specified, results will be printed to stdout) #[arg(long)] output: Option, + + /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) + #[arg(long)] + tldr: bool, /// Enable debug logging #[arg(short, long)] @@ -84,16 +89,17 @@ async fn main() -> Result<()> { match cli.command { Commands::Stdio { debug } => run_stdio_server(debug).await, Commands::Http { address, debug } => run_http_server(address, debug).await, - Commands::Test { - tool, - crate_name, - item_path, - query, - version, + Commands::Test { + tool, + crate_name, + item_path, + query, + version, limit, format, output, - debug + tldr, + debug } => run_test_tool(TestToolConfig { tool, crate_name, @@ -103,6 +109,7 @@ async fn main() -> Result<()> { limit, format, output, + tldr, debug }).await, } @@ -163,6 +170,41 @@ async fn run_http_server(address: String, debug: bool) -> Result<()> { Ok(()) } +// --- TLDR Helper Function --- +fn apply_tldr(input: &str) -> String { + // Remove LICENSE and VERSION(S) sections by skipping lines between those headings and the next heading or EOF. + let mut output = Vec::new(); + let mut skip = false; + + let license_re = Regex::new(r"(?i)^\s*#+\s*license\b").unwrap(); + let version_re = Regex::new(r"(?i)^\s*#+\s*version(s)?\b").unwrap(); + let heading_re = Regex::new(r"^\s*#+\s*\S+").unwrap(); + + let mut just_skipped_section = false; + for line in input.lines() { + // Start skipping if we hit a LICENSE or VERSION(S) heading + if !skip && (license_re.is_match(line) || version_re.is_match(line)) { + skip = true; + just_skipped_section = true; + continue; // skip the heading line itself + } + // If we just skipped a section heading, also skip blank lines and lines containing only "license" or "versions" + if just_skipped_section && (line.trim().is_empty() || line.trim().eq_ignore_ascii_case("license") || line.trim().eq_ignore_ascii_case("versions") || line.trim().eq_ignore_ascii_case("version")) { + continue; + } + // Stop skipping at the next heading (but do not skip the heading itself) + if skip && heading_re.is_match(line) { + skip = false; + just_skipped_section = false; + } + if !skip { + output.push(line); + } + } + // If the section to skip is at the end, skip will remain true and those lines will be omitted. + output.join("\n") +} + /// Configuration for the test tool struct TestToolConfig { tool: String, @@ -173,6 +215,7 @@ struct TestToolConfig { limit: Option, format: Option, output: Option, + tldr: bool, debug: bool, } @@ -187,6 +230,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { limit, format, output, + tldr, debug, } = config; // Print help information if the tool is "help" @@ -210,6 +254,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { println!("\nOutput options:"); println!(" --format - Output format: markdown (default), text, json"); println!(" --output - Write output to a file instead of stdout"); + println!(" --tldr - Summarize output by stripping LICENSE and VERSION sections"); return Ok(()); } @@ -289,7 +334,13 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { if !result.is_empty() { for content in result { if let Content::Text(text) = content { - let content_str = text.text; + let mut content_str = text.text; + + // TL;DR processing: strip LICENSE and VERSION(S) sections if --tldr is set + if tldr { + content_str = apply_tldr(&content_str); + } + let formatted_output = match format.as_str() { "json" => { // For search_crates, which may return JSON content @@ -321,7 +372,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { let description = crate_info.get("description").and_then(|v| v.as_str()).unwrap_or("No description"); let downloads = crate_info.get("downloads").and_then(|v| v.as_u64()).unwrap_or(0); - text_output.push_str(&format!("{}. {} - {} (Downloads: {})\n", + text_output.push_str(&format!("{}. {} - {} (Downloads: {})\n", i + 1, name, description, downloads)); } text_output @@ -384,4 +435,45 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { } Ok(()) -} \ No newline at end of file +} +#[cfg(test)] +mod tldr_tests { + use super::apply_tldr; + + #[test] + fn test_apply_tldr_removes_license_and_versions() { + let input = r#" +# Versions +This is version info. + +# LICENSE +MIT License text. + +# Usage +Some real documentation here. + +# Another Section +More docs. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); + assert!(output.contains("Usage")); + assert!(output.contains("Another Section")); + assert!(output.contains("Some real documentation here.")); + // Debug print for failure analysis + if output.to_lowercase().contains("license") { + println!("DEBUG OUTPUT:\n{}", output); + } + } + + #[test] + fn test_apply_tldr_handles_no_license_or_versions() { + let input = r#" +# Usage +Some real documentation here. +"#; + let output = apply_tldr(input); + assert_eq!(output.trim(), input.trim()); + } +} From 304ff1dea5e42aa35eefcb3278ed608e12ecf610 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 05:56:35 +0000 Subject: [PATCH 05/22] --tldr checkpoint 2 --- README.md | 2 ++ src/bin/cratedocs.rs | 52 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/README.md b/README.md index 2c90eea..35bd8d6 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --format t # Save output to a file cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --output tokio-docs.md +# Summarize output by stripping LICENSE and VERSION sections +cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --tldr ``` By default, the HTTP server will listen on `http://127.0.0.1:8080/sse`. diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index a4aba00..2cfba55 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -476,4 +476,56 @@ Some real documentation here. let output = apply_tldr(input); assert_eq!(output.trim(), input.trim()); } +#[test] +fn test_apply_tldr_no_headings() { + let input = r#" +This is plain text without any headings. +It should remain unchanged after processing. +"#; + let output = apply_tldr(input); + assert_eq!(output.trim(), input.trim()); +} + +#[test] +fn test_apply_tldr_malformed_markdown() { + let input = r#" +#LICENSE +This is a malformed license heading. +#VERSION +This is a malformed version heading. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); +} + +#[test] +fn test_apply_tldr_large_input() { + let input = r#" +# Versions +Version 1.0.0 +Version 2.0.0 + +# LICENSE +MIT License text. + +# Usage +Some real documentation here. + +# Another Section +More docs. + +# LICENSE +Another license section. + +# Versions +Another version section. +"#; + let output = apply_tldr(input); + assert!(!output.to_lowercase().contains("license")); + assert!(!output.to_lowercase().contains("version")); + assert!(output.contains("Usage")); + assert!(output.contains("Another Section")); + assert!(output.contains("Some real documentation here.")); +} } From 4afe1e71bfff722b427410d71f563e019f721cc3 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 06:23:43 +0000 Subject: [PATCH 06/22] added count_tokens --- Cargo.lock | 462 +++++++++++++++++++++++++++++++++++++++++++++-- Cargo.toml | 3 + src/tools/mod.rs | 14 +- 3 files changed, 464 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 541bcbc..52bd701 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,20 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom 0.3.1", + "once_cell", + "serde", + "version_check", + "zerocopy 0.8.26", +] + [[package]] name = "aho-corasick" version = "1.1.3" @@ -211,6 +225,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + [[package]] name = "base64" version = "0.21.7" @@ -247,6 +267,15 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +[[package]] +name = "castaway" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0abae9be0aaf9ea96a3b1b8b1b55c602ca751eba1b1500220cea4ecbafe7c0d5" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.16" @@ -348,6 +377,34 @@ dependencies = [ "memchr", ] +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width", + "windows-sys 0.59.0", +] + [[package]] name = "convert_case" version = "0.6.0" @@ -392,6 +449,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "tokenizers", "tokio", "tokio-util", "tower 0.4.13", @@ -410,12 +468,75 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "darling" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "dary_heap" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728" +dependencies = [ + "serde", +] + [[package]] name = "deranged" version = "0.3.11" @@ -425,6 +546,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -442,6 +594,18 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "encoding_rs" version = "0.8.35" @@ -467,6 +631,15 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -991,6 +1164,12 @@ dependencies = [ "syn", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.3" @@ -1022,6 +1201,19 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console", + "number_prefix", + "portable-atomic", + "unicode-width", + "web-time", +] + [[package]] name = "ipnet" version = "2.11.0" @@ -1034,6 +1226,15 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.15" @@ -1050,7 +1251,7 @@ dependencies = [ "combine", "jni-sys", "log", - "thiserror", + "thiserror 1.0.69", "walkdir", ] @@ -1116,6 +1317,22 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + [[package]] name = "markup5ever" version = "0.12.1" @@ -1164,12 +1381,12 @@ source = "git+https://github.com/modelcontextprotocol/rust-sdk?rev=c0bd94dd85a35 dependencies = [ "anyhow", "async-trait", - "base64", + "base64 0.21.7", "chrono", "schemars", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "url", ] @@ -1203,7 +1420,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "thiserror", + "thiserror 1.0.69", "tokio", "tower 0.4.13", "tower-service", @@ -1224,6 +1441,12 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.8.5" @@ -1268,6 +1491,27 @@ dependencies = [ "tokio", ] +[[package]] +name = "monostate" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafe1be9d0c75642e3e50fedc7ecadf1ef1cbce6eb66462153fc44245343fbee" +dependencies = [ + "monostate-impl", + "serde", +] + +[[package]] +name = "monostate-impl" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c402a4092d5e204f32c9e155431046831fa712637043c58cb73bc6bc6c9663b5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "native-tls" version = "0.2.14" @@ -1291,6 +1535,16 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1316,6 +1570,12 @@ dependencies = [ "autocfg", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "object" version = "0.36.7" @@ -1331,6 +1591,28 @@ version = "1.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" +[[package]] +name = "onig" +version = "6.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +dependencies = [ + "bitflags 2.9.0", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +dependencies = [ + "cc", + "pkg-config", +] + [[package]] name = "openssl" version = "0.10.71" @@ -1404,6 +1686,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1486,6 +1774,12 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +[[package]] +name = "portable-atomic" +version = "1.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" + [[package]] name = "powerfmt" version = "0.2.0" @@ -1544,7 +1838,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha 0.9.0", "rand_core 0.9.3", - "zerocopy 0.8.23", + "zerocopy 0.8.26", ] [[package]] @@ -1585,6 +1879,37 @@ dependencies = [ "getrandom 0.3.1", ] +[[package]] +name = "rayon" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.10" @@ -1644,7 +1969,7 @@ version = "0.11.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" dependencies = [ - "base64", + "base64 0.21.7", "bytes", "encoding_rs", "futures-core", @@ -1703,7 +2028,7 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" dependencies = [ - "base64", + "base64 0.21.7", ] [[package]] @@ -1915,12 +2240,30 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "string_cache" version = "0.8.8" @@ -2038,7 +2381,16 @@ version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ - "thiserror-impl", + "thiserror-impl 1.0.69", +] + +[[package]] +name = "thiserror" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708" +dependencies = [ + "thiserror-impl 2.0.12", ] [[package]] @@ -2052,6 +2404,17 @@ dependencies = [ "syn", ] +[[package]] +name = "thiserror-impl" +version = "2.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "thread_local" version = "1.1.8" @@ -2103,6 +2466,40 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tokenizers" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3846d8588abed0daba25a0e47edd58ea15e450a6088b2575f5116fdb0b27ca" +dependencies = [ + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "getrandom 0.3.1", + "indicatif", + "itertools", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.0", + "rayon", + "rayon-cond", + "regex", + "regex-syntax 0.8.5", + "serde", + "serde_json", + "spm_precompiled", + "thiserror 2.0.12", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", +] + [[package]] name = "tokio" version = "1.43.0" @@ -2218,7 +2615,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3566e8ce28cc0a3fe42519fc80e6b4c943cc4c8cef275620eb8dac2d3d4e06cf" dependencies = [ "crossbeam-channel", - "thiserror", + "thiserror 1.0.69", "time", "tracing-subscriber", ] @@ -2285,12 +2682,33 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + [[package]] name = "unicode-segmentation" version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a1a07cc7db3810833284e8d372ccdc6da29741639ecc70c9ec107df0fa6154c" + +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "url" version = "2.5.4" @@ -2338,6 +2756,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + [[package]] name = "walkdir" version = "2.5.0" @@ -2453,6 +2877,16 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "web-time" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2725,11 +3159,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.23" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd97444d05a4328b90e75e503a34bad781f14e28a823ad3557f0750df1ebcbc6" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ - "zerocopy-derive 0.8.23", + "zerocopy-derive 0.8.26", ] [[package]] @@ -2745,9 +3179,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.23" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6352c01d0edd5db859a63e2605f4ea3183ddbd15e2c4a9e7d32184df75e4f154" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c0feadd..d627e6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,9 @@ mcp-server = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = " mcp-core = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0bd94dd85a3535cb1580424465140d51bab2a17", package = "mcp-core" } mcp-macros = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0bd94dd85a3535cb1580424465140d51bab2a17", package = "mcp-macros" } +# Tokenizer dependency for token count functionality +tokenizers = { version = "0.21.2" } + # HTTP and networking tokio = { version = "1", features = ["full"] } reqwest = { version = "0.11", features = ["json"] } diff --git a/src/tools/mod.rs b/src/tools/mod.rs index b407b15..c44179c 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,4 +1,16 @@ pub mod docs; pub use docs::DocRouter; -pub use docs::docs::DocCache; \ No newline at end of file +pub use docs::docs::DocCache; + +use tokenizers::Tokenizer; +use tokenizers::models::wordpiece::WordPiece; + +// Function to count tokens in a given text +pub fn count_tokens(text: &str) -> Result { + // NOTE: You must provide a valid vocab file path for WordPiece + let model = WordPiece::from_file("path/to/vocab.txt").build()?; + let tokenizer = Tokenizer::new(model); + let tokens = tokenizer.encode(text, true)?; + Ok(tokens.get_ids().len()) +} \ No newline at end of file From 9042d01450bdfd240cad62d0d172779d50ca44cb Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 06:49:11 +0000 Subject: [PATCH 07/22] --max-tokens CLI parameter is now implemented for the Test subcommand. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Output is truncated to fit the specified token limit using Hugging Face's pretrained bert-base-cased tokenizer. 🦨 Skunky: Truncation is by character, not true token boundary; see code comment for future improvement. All changes build cleanly and are isolated to the explicit requirements. --- Cargo.lock | 283 ++++++++++++++++++++++++++++++++++++++++++- Cargo.toml | 2 +- src/bin/cratedocs.rs | 23 ++++ src/tools/mod.rs | 15 ++- 4 files changed, 313 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 52bd701..b61328c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -237,6 +237,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -459,6 +465,15 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + [[package]] name = "crossbeam-channel" version = "0.5.14" @@ -577,6 +592,27 @@ dependencies = [ "syn", ] +[[package]] +name = "dirs" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" +dependencies = [ + "dirs-sys", +] + +[[package]] +name = "dirs-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.59.0", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -646,6 +682,16 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "flate2" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -854,6 +900,25 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs", + "http 1.2.0", + "indicatif", + "libc", + "log", + "rand 0.9.0", + "serde", + "serde_json", + "thiserror 2.0.12", + "ureq", + "windows-sys 0.60.2", +] + [[package]] name = "html2md" version = "0.2.15" @@ -1283,6 +1348,16 @@ version = "0.2.170" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828" +[[package]] +name = "libredox" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" +dependencies = [ + "bitflags 2.9.0", + "libc", +] + [[package]] name = "linux-raw-sys" version = "0.9.2" @@ -1657,6 +1732,12 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "overload" version = "0.1.1" @@ -1919,6 +2000,17 @@ dependencies = [ "bitflags 2.9.0", ] +[[package]] +name = "redox_users" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd6f9d3d47bdd2ad6945c5015a226ec6155d0bcdfd8f7cd29f86b71f8de99d2b" +dependencies = [ + "getrandom 0.2.15", + "libredox", + "thiserror 2.0.12", +] + [[package]] name = "regex" version = "1.11.1" @@ -2003,6 +2095,20 @@ dependencies = [ "winreg", ] +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.15", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustc-demangle" version = "0.1.24" @@ -2022,6 +2128,21 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "rustls" +version = "0.23.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -2031,6 +2152,26 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pki-types" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4a72fe2bcf7a6ac6fd7d0b9e5cb68aeb7d4c0a0271730218b3e92d43b4eb435" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.20" @@ -2240,6 +2381,17 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + [[package]] name = "spm_precompiled" version = "0.1.4" @@ -2295,6 +2447,12 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.99" @@ -2479,6 +2637,7 @@ dependencies = [ "derive_builder", "esaxx-rs", "getrandom 0.3.1", + "hf-hub", "indicatif", "itertools", "log", @@ -2709,6 +2868,31 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + [[package]] name = "url" version = "2.5.4" @@ -2887,6 +3071,24 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.1", +] + +[[package]] +name = "webpki-roots" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "winapi" version = "0.3.9" @@ -2960,6 +3162,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2984,13 +3195,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -3003,6 +3230,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -3015,6 +3248,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -3027,12 +3266,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -3045,6 +3296,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -3057,6 +3314,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -3069,6 +3332,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -3081,6 +3350,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winreg" version = "0.50.0" @@ -3209,6 +3484,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" + [[package]] name = "zerovec" version = "0.10.4" diff --git a/Cargo.toml b/Cargo.toml index d627e6a..c153ee9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,7 +20,7 @@ mcp-core = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0 mcp-macros = { git = "https://github.com/modelcontextprotocol/rust-sdk", rev = "c0bd94dd85a3535cb1580424465140d51bab2a17", package = "mcp-macros" } # Tokenizer dependency for token count functionality -tokenizers = { version = "0.21.2" } +tokenizers = { version = "0.21.2", features = ["http"] } # HTTP and networking tokio = { version = "1", features = ["full"] } diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 2cfba55..fa0faf1 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -75,6 +75,10 @@ enum Commands { /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) #[arg(long)] tldr: bool, + + /// Maximum number of tokens for output (token-aware truncation) + #[arg(long)] + max_tokens: Option, /// Enable debug logging #[arg(short, long)] @@ -99,6 +103,7 @@ async fn main() -> Result<()> { format, output, tldr, + max_tokens, debug } => run_test_tool(TestToolConfig { tool, @@ -110,6 +115,7 @@ async fn main() -> Result<()> { format, output, tldr, + max_tokens, debug }).await, } @@ -216,6 +222,7 @@ struct TestToolConfig { format: Option, output: Option, tldr: bool, + max_tokens: Option, debug: bool, } @@ -231,6 +238,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { format, output, tldr, + max_tokens, debug, } = config; // Print help information if the tool is "help" @@ -336,6 +344,21 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { if let Content::Text(text) = content { let mut content_str = text.text; + // If max_tokens is set, truncate output to fit within the limit + if let Some(max_tokens) = max_tokens { + match cratedocs_mcp::tools::count_tokens(&content_str) { + Ok(token_count) if token_count > max_tokens => { + // 🦨 skunky: This truncates by character, not token boundary. For true token-aware truncation, split and re-encode. + let mut truncated = content_str.clone(); + while cratedocs_mcp::tools::count_tokens(&truncated).map_or(0, |c| c) > max_tokens && !truncated.is_empty() { + truncated.pop(); + } + content_str = truncated; + } + _ => {} + } + } + // TL;DR processing: strip LICENSE and VERSION(S) sections if --tldr is set if tldr { content_str = apply_tldr(&content_str); diff --git a/src/tools/mod.rs b/src/tools/mod.rs index c44179c..4130f36 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -3,14 +3,13 @@ pub mod docs; pub use docs::DocRouter; pub use docs::docs::DocCache; -use tokenizers::Tokenizer; -use tokenizers::models::wordpiece::WordPiece; -// Function to count tokens in a given text +// Function to count tokens in a given text using a pretrained model from Hugging Face Hub +use tokenizers::tokenizer::Tokenizer; + pub fn count_tokens(text: &str) -> Result { - // NOTE: You must provide a valid vocab file path for WordPiece - let model = WordPiece::from_file("path/to/vocab.txt").build()?; - let tokenizer = Tokenizer::new(model); - let tokens = tokenizer.encode(text, true)?; - Ok(tokens.get_ids().len()) + // 🦨 skunky: This loads the tokenizer from Hugging Face Hub every call; cache for production. + let tokenizer = Tokenizer::from_pretrained("bert-base-cased", None)?; + let encoding = tokenizer.encode(text, true)?; + Ok(encoding.get_ids().len()) } \ No newline at end of file From f891b415d9827069e5e7bcec302ce0ee74434874 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 06:54:37 +0000 Subject: [PATCH 08/22] =?UTF-8?q?added=20=E5=86=85=E5=AE=B9=E8=A2=AB?= =?UTF-8?q?=E6=88=AA=E6=96=AD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/bin/cratedocs.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index fa0faf1..a185f0f 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -348,11 +348,15 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { if let Some(max_tokens) = max_tokens { match cratedocs_mcp::tools::count_tokens(&content_str) { Ok(token_count) if token_count > max_tokens => { - // 🦨 skunky: This truncates by character, not token boundary. For true token-aware truncation, split and re-encode. + // Truncate by character, then to previous word boundary, and append Mandarin to indicate truncation. let mut truncated = content_str.clone(); while cratedocs_mcp::tools::count_tokens(&truncated).map_or(0, |c| c) > max_tokens && !truncated.is_empty() { truncated.pop(); } + if let Some(last_space) = truncated.rfind(' ') { + truncated.truncate(last_space); + } + truncated.push_str(" 内容被截断"); content_str = truncated; } _ => {} From 04aeafd585a022f1113fc430fa5255e3bbcc7af8 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 07:23:12 +0000 Subject: [PATCH 09/22] improved readme --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 35bd8d6..b6df7e0 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,9 @@ cargo run --bin cratedocs test --tool lookup_item --crate-name tokio --item-path # Look up documentation for a specific version cargo run --bin cratedocs test --tool lookup_item --crate-name serde --item-path Serialize --version 1.0.147 +# Look up a trait in a crate (e.g., the Serialize trait in serde) & a specific version +cargo run --bin cratedocs test --tool lookup_item --crate-name serde --item-path serde::Serialize --version 1.0.160 + # Search for crates cargo run --bin cratedocs test --tool search_crates --query logger --limit 5 @@ -64,8 +67,9 @@ cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --format t # Save output to a file cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --output tokio-docs.md -# Summarize output by stripping LICENSE and VERSION sections -cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --tldr + +# Summarize output by stripping LICENSE and VERSION sections, limits to xxxxx tokens (uses huggingface tokenizer) +cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --tldr --max_tokens 48000 ``` By default, the HTTP server will listen on `http://127.0.0.1:8080/sse`. From e98ed75b67ca80d6f9fa3c99c01def31e91440c5 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 07:50:28 +0000 Subject: [PATCH 10/22] list_crate_items --- src/bin/cratedocs.rs | 54 +++++++++++++++++++++++++++++++++----------- src/tools/mod.rs | 1 + 2 files changed, 42 insertions(+), 13 deletions(-) diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index a185f0f..8b50a6e 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -84,6 +84,24 @@ enum Commands { #[arg(short, long)] debug: bool, }, + /// List all items in a crate (using rust-analyzer) + ListCrateItems { + /// Crate name (e.g., serde) + #[arg(long)] + crate_name: String, + /// Crate version (e.g., 1.0.0) + #[arg(long)] + version: String, + /// Filter by item type (struct, enum, trait, fn, macro, mod) + #[arg(long)] + item_type: Option, + /// Filter by visibility (pub, private) + #[arg(long)] + visibility: Option, + /// Filter by module path (e.g., serde::de) + #[arg(long)] + module: Option, + }, } #[tokio::main] @@ -118,6 +136,23 @@ async fn main() -> Result<()> { max_tokens, debug }).await, + Commands::ListCrateItems { + crate_name, + version, + item_type, + visibility, + module, + } => { + use cratedocs_mcp::tools::item_list::{list_crate_items, ItemListFilters}; + let filters = ItemListFilters { + item_type, + visibility, + module, + }; + let result = list_crate_items(&crate_name, &version, Some(filters)).await?; + println!("{}", result); + Ok(()) + } } } @@ -182,32 +217,25 @@ fn apply_tldr(input: &str) -> String { let mut output = Vec::new(); let mut skip = false; - let license_re = Regex::new(r"(?i)^\s*#+\s*license\b").unwrap(); - let version_re = Regex::new(r"(?i)^\s*#+\s*version(s)?\b").unwrap(); - let heading_re = Regex::new(r"^\s*#+\s*\S+").unwrap(); + // Match any heading (with or without space) for LICENSE or VERSION(S) + let tldr_section_re = Regex::new(r"(?i)^\s*#+\s*license\b|^\s*#+\s*version(s)?\b|^\s*#+license\b|^\s*#+version(s)?\b").unwrap(); + // Match any heading (for ending the skip) + let heading_re = Regex::new(r"^\s*#+").unwrap(); - let mut just_skipped_section = false; for line in input.lines() { // Start skipping if we hit a LICENSE or VERSION(S) heading - if !skip && (license_re.is_match(line) || version_re.is_match(line)) { + if !skip && tldr_section_re.is_match(line) { skip = true; - just_skipped_section = true; continue; // skip the heading line itself } - // If we just skipped a section heading, also skip blank lines and lines containing only "license" or "versions" - if just_skipped_section && (line.trim().is_empty() || line.trim().eq_ignore_ascii_case("license") || line.trim().eq_ignore_ascii_case("versions") || line.trim().eq_ignore_ascii_case("version")) { - continue; - } // Stop skipping at the next heading (but do not skip the heading itself) - if skip && heading_re.is_match(line) { + if skip && heading_re.is_match(line) && !tldr_section_re.is_match(line) { skip = false; - just_skipped_section = false; } if !skip { output.push(line); } } - // If the section to skip is at the end, skip will remain true and those lines will be omitted. output.join("\n") } diff --git a/src/tools/mod.rs b/src/tools/mod.rs index 4130f36..cd6b80a 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,4 +1,5 @@ pub mod docs; +pub mod item_list; pub use docs::DocRouter; pub use docs::docs::DocCache; From 96442c185171c44846e55388a22b11032d8c4e4f Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 07:52:12 +0000 Subject: [PATCH 11/22] list_crate_items --- README.md | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/README.md b/README.md index b6df7e0..2e7cc9d 100644 --- a/README.md +++ b/README.md @@ -150,3 +150,44 @@ This server implements the Model Context Protocol (MCP) which allows it to be ea ## License MIT License + +## MCP Tool: `list_crate_items` + +The `list_crate_items` tool enumerates all items in a specified Rust crate and version, optionally filtering by item type, visibility, or module path. This is useful for quickly exploring the structure of a crate, generating concise listings for LLMs, or programmatically analyzing crate APIs. + +### Usage + +```sh +cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 +``` + +#### With filters: + +- Filter by item type (e.g., struct, enum, trait, fn, macro, mod): + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --item-type struct + ``` + +- Filter by visibility (e.g., pub, private): + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --visibility pub + ``` + +- Filter by module path: + + ```sh + cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --module serde::de + ``` + +### Output + +The output is a concise, categorized list (JSON or markdown) showing each item's name, type, visibility, and module path. + +**Example (stub output):** +``` +Stub: list_crate_items for crate: serde, version: 1.0.0, filters: Some(ItemListFilters { item_type: Some("struct"), visibility: None, module: None }) +``` + +When implemented, the output will be a structured list of items matching the filters. From f37981ec071d40faeefec2c262b64fcb6545a22e Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 08:15:09 +0000 Subject: [PATCH 12/22] checkpoint 1, broken --- README.md | 57 +++++++++++++++-------------------- src/bin/cratedocs.rs | 35 ---------------------- src/tools/docs/docs.rs | 68 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 68 deletions(-) diff --git a/README.md b/README.md index 14f9a2d..c89b998 100644 --- a/README.md +++ b/README.md @@ -189,47 +189,38 @@ in `mcp_settings.json` ``` -## License - -MIT License -## MCP Tool: `list_crate_items` +### 4. `list_crate_items` -The `list_crate_items` tool enumerates all items in a specified Rust crate and version, optionally filtering by item type, visibility, or module path. This is useful for quickly exploring the structure of a crate, generating concise listings for LLMs, or programmatically analyzing crate APIs. +Enumerates all items in a specified Rust crate and version, optionally filtering by item type, visibility, or module path. Useful for exploring crate structure, generating concise listings for LLMs, or programmatically analyzing crate APIs. -### Usage +**Parameters:** +- `crate_name` (required): The name of the crate +- `version` (required): The version of the crate +- `item_type` (optional): Filter by item type (struct, enum, trait, fn, macro, mod) +- `visibility` (optional): Filter by visibility (pub, private) +- `module` (optional): Filter by module path (e.g., serde::de) -```sh -cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 +**Example:** +```json +{ + "name": "list_crate_items", + "arguments": { + "crate_name": "serde", + "version": "1.0.0", + "item_type": "struct" + } +} ``` -#### With filters: - -- Filter by item type (e.g., struct, enum, trait, fn, macro, mod): - - ```sh - cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --item-type struct - ``` - -- Filter by visibility (e.g., pub, private): - - ```sh - cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --visibility pub - ``` - -- Filter by module path: - - ```sh - cargo run --bin cratedocs -- list-crate-items --crate-name serde --version 1.0.0 --module serde::de - ``` - -### Output - -The output is a concise, categorized list (JSON or markdown) showing each item's name, type, visibility, and module path. - -**Example (stub output):** +**Example Output (stub):** ``` Stub: list_crate_items for crate: serde, version: 1.0.0, filters: Some(ItemListFilters { item_type: Some("struct"), visibility: None, module: None }) ``` When implemented, the output will be a structured list of items matching the filters. + + +## License + +MIT License diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 8b50a6e..02f1d7d 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -84,24 +84,6 @@ enum Commands { #[arg(short, long)] debug: bool, }, - /// List all items in a crate (using rust-analyzer) - ListCrateItems { - /// Crate name (e.g., serde) - #[arg(long)] - crate_name: String, - /// Crate version (e.g., 1.0.0) - #[arg(long)] - version: String, - /// Filter by item type (struct, enum, trait, fn, macro, mod) - #[arg(long)] - item_type: Option, - /// Filter by visibility (pub, private) - #[arg(long)] - visibility: Option, - /// Filter by module path (e.g., serde::de) - #[arg(long)] - module: Option, - }, } #[tokio::main] @@ -136,23 +118,6 @@ async fn main() -> Result<()> { max_tokens, debug }).await, - Commands::ListCrateItems { - crate_name, - version, - item_type, - visibility, - module, - } => { - use cratedocs_mcp::tools::item_list::{list_crate_items, ItemListFilters}; - let filters = ItemListFilters { - item_type, - visibility, - module, - }; - let result = list_crate_items(&crate_name, &version, Some(filters)).await?; - println!("{}", result); - Ok(()) - } } } diff --git a/src/tools/docs/docs.rs b/src/tools/docs/docs.rs index 486a9f5..710bfb6 100644 --- a/src/tools/docs/docs.rs +++ b/src/tools/docs/docs.rs @@ -1,3 +1,4 @@ +use crate::tools::item_list; use std::{future::Future, pin::Pin, sync::Arc}; use mcp_core::{ @@ -321,6 +322,36 @@ impl mcp_server::Router for DocRouter { "required": ["crate_name", "item_path"] }), ), + Tool::new( + "list_crate_items".to_string(), + "Enumerate all items in a Rust crate (optionally filtered by type, visibility, or module). Returns a concise, categorized list.".to_string(), + json!({ + "type": "object", + "properties": { + "crate_name": { + "type": "string", + "description": "The name of the crate" + }, + "version": { + "type": "string", + "description": "The version of the crate" + }, + "item_type": { + "type": "string", + "description": "Filter by item type (struct, enum, trait, fn, macro, mod)" + }, + "visibility": { + "type": "string", + "description": "Filter by visibility (pub, private)" + }, + "module": { + "type": "string", + "description": "Filter by module path (e.g., serde::de)" + } + }, + "required": ["crate_name", "version"] + }), + ), ] } @@ -386,6 +417,43 @@ impl mcp_server::Router for DocRouter { let doc = this.lookup_item(crate_name, item_path, version).await?; Ok(vec![Content::text(doc)]) } + "list_crate_items" => { + let crate_name = arguments + .get("crate_name") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidParameters("crate_name is required".to_string()))? + .to_string(); + let version = arguments + .get("version") + .and_then(|v| v.as_str()) + .ok_or_else(|| ToolError::InvalidParameters("version is required".to_string()))? + .to_string(); + let item_type = arguments + .get("item_type") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let visibility = arguments + .get("visibility") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let module = arguments + .get("module") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let filters = cratedocs_mcp::tools::item_list::ItemListFilters { + item_type, + visibility, + module, + }; + let result = cratedocs_mcp::tools::item_list::list_crate_items( + &crate_name, + &version, + Some(filters), + ) + .await + .map_err(|e| ToolError::ExecutionError(format!("list_crate_items failed: {}", e)))?; + Ok(vec![Content::text(result)]) + } _ => Err(ToolError::NotFound(format!("Tool {} not found", tool_name))), } }) From ae1fc4297a160d1b56e8fbed8f118c7be5b15264 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 08:16:47 +0000 Subject: [PATCH 13/22] checkpoint 2, working --- src/tools/docs/docs.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tools/docs/docs.rs b/src/tools/docs/docs.rs index 710bfb6..e2f8842 100644 --- a/src/tools/docs/docs.rs +++ b/src/tools/docs/docs.rs @@ -440,12 +440,12 @@ impl mcp_server::Router for DocRouter { .get("module") .and_then(|v| v.as_str()) .map(|s| s.to_string()); - let filters = cratedocs_mcp::tools::item_list::ItemListFilters { + let filters = item_list::ItemListFilters { item_type, visibility, module, }; - let result = cratedocs_mcp::tools::item_list::list_crate_items( + let result = item_list::list_crate_items( &crate_name, &version, Some(filters), From fd18fe7cf57bb6b2d56bc9fba7a90221f04fa4b5 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 09:39:04 +0000 Subject: [PATCH 14/22] checkpoint, moved list_crate_items moved to tools --- src/bin/cratedocs.rs | 47 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 02f1d7d..aef8cf0 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -40,11 +40,11 @@ enum Commands { }, /// Test tools directly from the CLI Test { - /// The tool to test (lookup_crate, search_crates, lookup_item) + /// The tool to test (lookup_crate, search_crates, lookup_item, list_crate_items) #[arg(long, default_value = "lookup_crate")] tool: String, - /// Crate name for lookup_crate and lookup_item + /// Crate name for lookup_crate, lookup_item, and list_crate_items #[arg(long)] crate_name: Option, @@ -64,6 +64,18 @@ enum Commands { #[arg(long)] limit: Option, + /// Filter by item type for list_crate_items (e.g., struct, enum, trait) + #[arg(long)] + item_type: Option, + + /// Filter by visibility for list_crate_items (e.g., pub, private) + #[arg(long)] + visibility: Option, + + /// Filter by module path for list_crate_items (e.g., serde::de) + #[arg(long)] + module: Option, + /// Output format (markdown, text, json) #[arg(long, default_value = "markdown")] format: Option, @@ -71,11 +83,11 @@ enum Commands { /// Output file path (if not specified, results will be printed to stdout) #[arg(long)] output: Option, - + /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) #[arg(long)] tldr: bool, - + /// Maximum number of tokens for output (token-aware truncation) #[arg(long)] max_tokens: Option, @@ -100,6 +112,9 @@ async fn main() -> Result<()> { query, version, limit, + item_type, + visibility, + module, format, output, tldr, @@ -112,6 +127,9 @@ async fn main() -> Result<()> { query, version, limit, + item_type, + visibility, + module, format, output, tldr, @@ -212,6 +230,9 @@ struct TestToolConfig { query: Option, version: Option, limit: Option, + item_type: Option, + visibility: Option, + module: Option, format: Option, output: Option, tldr: bool, @@ -233,6 +254,9 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { tldr, max_tokens, debug, + item_type, + visibility, + module, } = config; // Print help information if the tool is "help" if tool == "help" { @@ -308,6 +332,21 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { "limit": limit, }) }, + "list_crate_items" => { + let crate_name = crate_name.ok_or_else(|| + anyhow::anyhow!("--crate-name is required for list_crate_items tool"))?; + let version = version.ok_or_else(|| + anyhow::anyhow!("--version is required for list_crate_items tool"))?; + + let arguments = json!({ + "crate_name": crate_name, + "version": version, + "item_type": item_type, + "visibility": visibility, + "module": module, + }); + arguments + }, _ => return Err(anyhow::anyhow!("Unknown tool: {}", tool)), }; From fc3f2f955a261e9dd814bf53d87f93d1eb189559 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 09:41:39 +0000 Subject: [PATCH 15/22] checkpoint, 1 test fails --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index c89b998..d8a9a80 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,9 @@ cargo run --bin cratedocs http --debug ### Directly Testing Documentation Tools +# Enumerate crate items +cargo run --bin cratedocs test --tool list_crate_items --crate-name serde --version 1.0.0 --item-type struct +cargo run --bin cratedocs test --tool list_crate_items --crate-name tokio --version 1.28.0 --visibility pub --module tokio::sync You can directly test the documentation tools from the command line without starting a server: ```bash From b2df17188adee35ef60b5a6758fbeb9596d625b7 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 09:51:06 +0000 Subject: [PATCH 16/22] added version tool --- .gitignore | 1 + README.md | 3 +- src/bin/cratedocs.rs | 6 +++ src/tools/docs/tests.rs | 4 +- src/tools/item_list.rs | 77 ++++++++++++++++++++++++++++++++++++++ tests/integration_tests.rs | 6 +-- 6 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 src/tools/item_list.rs diff --git a/.gitignore b/.gitignore index b197cae..7fc7d40 100644 --- a/.gitignore +++ b/.gitignore @@ -24,3 +24,4 @@ Thumbs.db *.swo output_tests target/* +context_portal/* diff --git a/README.md b/README.md index d8a9a80..40f667d 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,10 @@ This is an MCP (Model Context Protocol) server that provides tools for Rust crat ## Installation ```bash -git clone https://github.com/d6e/cratedocs-mcp.git +git clone https://github.com/promptexecution/cratedocs-mcp.git cd cratedocs-mcp cargo build --release +cargo install --path . ``` ## Running the Server diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index aef8cf0..c782baa 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -22,6 +22,8 @@ struct Cli { #[derive(Subcommand)] enum Commands { + /// Output the version and exit + Version, /// Run the server in stdin/stdout mode Stdio { /// Enable debug logging @@ -103,6 +105,10 @@ async fn main() -> Result<()> { let cli = Cli::parse(); match cli.command { + Commands::Version => { + println!("{}", env!("CARGO_PKG_VERSION")); + Ok(()) + }, Commands::Stdio { debug } => run_stdio_server(debug).await, Commands::Http { address, debug } => run_http_server(address, debug).await, Commands::Test { diff --git a/src/tools/docs/tests.rs b/src/tools/docs/tests.rs index 71acf32..0b6fa3b 100644 --- a/src/tools/docs/tests.rs +++ b/src/tools/docs/tests.rs @@ -75,8 +75,8 @@ async fn test_list_tools() { let router = DocRouter::new(); let tools = router.list_tools(); - // Should have exactly 3 tools - assert_eq!(tools.len(), 3); + // Should have exactly 4 tools (lookup_crate, search_crates, lookup_item, list_crate_items) + assert_eq!(tools.len(), 4); // Check tool names let tool_names: Vec = tools.iter().map(|t| t.name.clone()).collect(); diff --git a/src/tools/item_list.rs b/src/tools/item_list.rs new file mode 100644 index 0000000..68316ad --- /dev/null +++ b/src/tools/item_list.rs @@ -0,0 +1,77 @@ +use anyhow::Result; + +/// Represents filters for item listing. +#[derive(Debug)] +pub struct ItemListFilters { + pub item_type: Option, + pub visibility: Option, + pub module: Option, +} + +/// Stub for the crate item enumeration tool. +/// This will use rust-analyzer to enumerate items in a crate. +pub async fn list_crate_items( + crate_name: &str, + version: &str, + filters: Option, +) -> Result { + // 🦨 skunky: Implementation pending. Will use rust-analyzer APIs. + Ok(format!( + "Stub: list_crate_items for crate: {}, version: {}, filters: {:?}", + crate_name, version, filters + )) +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio; + + #[tokio::test] + async fn test_basic_call_returns_stub() { + let result = list_crate_items("serde", "1.0.0", None).await.unwrap(); + assert!(result.contains("Stub: list_crate_items for crate: serde, version: 1.0.0"), "Stub output missing expected text"); + } + + #[tokio::test] + async fn test_with_item_type_filter() { + let filters = ItemListFilters { + item_type: Some("struct".to_string()), + visibility: None, + module: None, + }; + let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); + assert!(result.contains("filters: Some"), "Stub output missing filters"); + assert!(result.contains("struct"), "Stub output missing item_type"); + } + + #[tokio::test] + async fn test_with_visibility_filter() { + let filters = ItemListFilters { + item_type: None, + visibility: Some("pub".to_string()), + module: None, + }; + let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); + assert!(result.contains("filters: Some"), "Stub output missing filters"); + assert!(result.contains("pub"), "Stub output missing visibility"); + } + + #[tokio::test] + async fn test_with_module_filter() { + let filters = ItemListFilters { + item_type: None, + visibility: None, + module: Some("serde::de".to_string()), + }; + let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); + assert!(result.contains("filters: Some"), "Stub output missing filters"); + assert!(result.contains("serde::de"), "Stub output missing module filter"); + } + + #[tokio::test] + async fn test_invalid_crate_name() { + let result = list_crate_items("not_a_real_crate", "0.0.1", None).await.unwrap(); + assert!(result.contains("not_a_real_crate"), "Stub output missing invalid crate name"); + } +} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index c1631df..fdea00b 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -13,7 +13,7 @@ async fn test_doc_router_initialization() { // Tools should be available and correctly configured let tools = router.list_tools(); - assert_eq!(tools.len(), 3); + assert_eq!(tools.len(), 4); // Check specific tool schemas let lookup_crate_tool = tools.iter().find(|t| t.name == "lookup_crate").unwrap(); @@ -68,8 +68,8 @@ async fn test_end_to_end_crate_lookup() { // The response should be HTML from docs.rs match &content[0] { mcp_core::Content::Text(text) => { - assert!(text.text.contains("")); - assert!(text.text.contains("serde")); + // Output is now markdown, not HTML + assert!(text.text.to_lowercase().contains("serde")); }, _ => panic!("Expected text content"), } From 579f0c1ed169a4907bd52f14e00a1fa4e919ed17 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 10:43:36 +0000 Subject: [PATCH 17/22] --tldr added tag stripping --- src/bin/cratedocs.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index c782baa..0d05b1e 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -210,6 +210,8 @@ fn apply_tldr(input: &str) -> String { let tldr_section_re = Regex::new(r"(?i)^\s*#+\s*license\b|^\s*#+\s*version(s)?\b|^\s*#+license\b|^\s*#+version(s)?\b").unwrap(); // Match any heading (for ending the skip) let heading_re = Regex::new(r"^\s*#+").unwrap(); + // Match tags including start, end, and inline attributes + let detail_tag_re = Regex::new(r"<[/]?detail.*?>").unwrap(); for line in input.lines() { // Start skipping if we hit a LICENSE or VERSION(S) heading @@ -222,10 +224,12 @@ fn apply_tldr(input: &str) -> String { skip = false; } if !skip { - output.push(line); + // Remove tags from the line + let cleaned_line = detail_tag_re.replace_all(line, "").to_string(); + output.push(cleaned_line.to_string()); } } - output.join("\n") + output.iter().map(|s| s.as_str()).collect::>().join("\n") } /// Configuration for the test tool @@ -594,3 +598,4 @@ Another version section. assert!(output.contains("Some real documentation here.")); } } + From 58b7680c8cd45a92940b02c435dec1cfa66e2f54 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 11:05:38 +0000 Subject: [PATCH 18/22] checkpoint, syn in - but missing --- Cargo.lock | 8 ++-- Cargo.toml | 2 + src/tools/item_list.rs | 102 +++++++++++++++++++---------------------- 3 files changed, 54 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b61328c..b1a5047 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -443,6 +443,7 @@ dependencies = [ "anyhow", "axum", "clap", + "flate2", "futures", "html2md", "hyper 0.14.32", @@ -455,6 +456,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "syn", "tokenizers", "tokio", "tokio-util", @@ -610,7 +612,7 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -2455,9 +2457,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.99" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e02e925281e18ffd9d640e234264753c43edc62d64b2d4cf898f1bc5e75f3fc2" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index c153ee9..efc576f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,8 @@ rand = "0.8" clap = { version = "4.4", features = ["derive"] } html2md = "0.2.14" regex = "1" +syn = { version = "2.0.104", features = ["full"] } +flate2 = "1.1.2" [dev-dependencies] # Testing utilities diff --git a/src/tools/item_list.rs b/src/tools/item_list.rs index 68316ad..7d2e10f 100644 --- a/src/tools/item_list.rs +++ b/src/tools/item_list.rs @@ -1,4 +1,11 @@ use anyhow::Result; +use reqwest; +use std::fs; +use std::path::Path; +use tar::Archive; +use flate2::read::GzDecoder; +use syn::{File, Item}; +use tokio::fs as tokio_fs; /// Represents filters for item listing. #[derive(Debug)] @@ -8,6 +15,27 @@ pub struct ItemListFilters { pub module: Option, } +/// Utility function to download and cache crate source. +async fn download_and_cache_crate(crate_name: &str, version: &str) -> Result { + let cache_dir = Path::new("./cache"); + let crate_dir = cache_dir.join(format!("{}-{}", crate_name, version)); + + if crate_dir.exists() { + return Ok(crate_dir.to_string_lossy().to_string()); + } + + let url = format!("https://crates.io/api/v1/crates/{}/{}/download", crate_name, version); + let response = reqwest::get(&url).await?; + let tarball = response.bytes().await?; + + fs::create_dir_all(&cache_dir)?; + let tar_gz = GzDecoder::new(&*tarball); + let mut archive = Archive::new(tar_gz); + archive.unpack(&cache_dir)?; + + Ok(crate_dir.to_string_lossy().to_string()) +} + /// Stub for the crate item enumeration tool. /// This will use rust-analyzer to enumerate items in a crate. pub async fn list_crate_items( @@ -15,63 +43,27 @@ pub async fn list_crate_items( version: &str, filters: Option, ) -> Result { - // 🦨 skunky: Implementation pending. Will use rust-analyzer APIs. - Ok(format!( - "Stub: list_crate_items for crate: {}, version: {}, filters: {:?}", - crate_name, version, filters - )) -} + let crate_path = download_and_cache_crate(crate_name, version).await?; + let mut items = Vec::new(); -#[cfg(test)] -mod tests { - use super::*; - use tokio; + for entry in fs::read_dir(crate_path)? { + let entry = entry?; + let path = entry.path(); + if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + let content = fs::read_to_string(&path)?; + let parsed_file: File = syn::parse_file(&content)?; - #[tokio::test] - async fn test_basic_call_returns_stub() { - let result = list_crate_items("serde", "1.0.0", None).await.unwrap(); - assert!(result.contains("Stub: list_crate_items for crate: serde, version: 1.0.0"), "Stub output missing expected text"); + for item in parsed_file.items { + match item { + Item::Struct(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("struct")) => items.push(format!("{:?}", item)), + Item::Enum(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("enum")) => items.push(format!("{:?}", item)), + Item::Trait(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("trait")) => items.push(format!("{:?}", item)), + Item::Fn(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("fn")) => items.push(format!("{:?}", item)), + _ => {} + } + } + } } - #[tokio::test] - async fn test_with_item_type_filter() { - let filters = ItemListFilters { - item_type: Some("struct".to_string()), - visibility: None, - module: None, - }; - let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); - assert!(result.contains("filters: Some"), "Stub output missing filters"); - assert!(result.contains("struct"), "Stub output missing item_type"); - } - - #[tokio::test] - async fn test_with_visibility_filter() { - let filters = ItemListFilters { - item_type: None, - visibility: Some("pub".to_string()), - module: None, - }; - let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); - assert!(result.contains("filters: Some"), "Stub output missing filters"); - assert!(result.contains("pub"), "Stub output missing visibility"); - } - - #[tokio::test] - async fn test_with_module_filter() { - let filters = ItemListFilters { - item_type: None, - visibility: None, - module: Some("serde::de".to_string()), - }; - let result = list_crate_items("serde", "1.0.0", Some(filters)).await.unwrap(); - assert!(result.contains("filters: Some"), "Stub output missing filters"); - assert!(result.contains("serde::de"), "Stub output missing module filter"); - } - - #[tokio::test] - async fn test_invalid_crate_name() { - let result = list_crate_items("not_a_real_crate", "0.0.1", None).await.unwrap(); - assert!(result.contains("not_a_real_crate"), "Stub output missing invalid crate name"); - } + Ok(items.join("\n")) } From a13d2bea1b78e321a20f957ed4e22c3b1e3ebccd Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sat, 5 Jul 2025 11:12:34 +0000 Subject: [PATCH 19/22] list_crate_items appears to work! --- Cargo.lock | 35 +++++++++++++++++++++ Cargo.toml | 1 + src/tools/item_list.rs | 70 +++++++++++++++++++++++++++++++++--------- 3 files changed, 91 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b1a5047..6d79a86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -457,6 +457,7 @@ dependencies = [ "serde", "serde_json", "syn", + "tar", "tokenizers", "tokio", "tokio-util", @@ -684,6 +685,18 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "filetime" +version = "0.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" +dependencies = [ + "cfg-if", + "libc", + "libredox", + "windows-sys 0.59.0", +] + [[package]] name = "flate2" version = "1.1.2" @@ -1358,6 +1371,7 @@ checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638" dependencies = [ "bitflags 2.9.0", "libc", + "redox_syscall", ] [[package]] @@ -2510,6 +2524,17 @@ dependencies = [ "libc", ] +[[package]] +name = "tar" +version = "0.4.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "tempfile" version = "3.18.0" @@ -3389,6 +3414,16 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xattr" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af3a19837351dc82ba89f8a125e22a3c475f05aba604acc023d62b2739ae2909" +dependencies = [ + "libc", + "rustix", +] + [[package]] name = "xml5ever" version = "0.18.1" diff --git a/Cargo.toml b/Cargo.toml index efc576f..4c4f286 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ html2md = "0.2.14" regex = "1" syn = { version = "2.0.104", features = ["full"] } flate2 = "1.1.2" +tar = "0.4.44" [dev-dependencies] # Testing utilities diff --git a/src/tools/item_list.rs b/src/tools/item_list.rs index 7d2e10f..6e0c4dc 100644 --- a/src/tools/item_list.rs +++ b/src/tools/item_list.rs @@ -5,7 +5,6 @@ use std::path::Path; use tar::Archive; use flate2::read::GzDecoder; use syn::{File, Item}; -use tokio::fs as tokio_fs; /// Represents filters for item listing. #[derive(Debug)] @@ -46,24 +45,65 @@ pub async fn list_crate_items( let crate_path = download_and_cache_crate(crate_name, version).await?; let mut items = Vec::new(); - for entry in fs::read_dir(crate_path)? { - let entry = entry?; - let path = entry.path(); - if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { - let content = fs::read_to_string(&path)?; - let parsed_file: File = syn::parse_file(&content)?; + // Most crates have their source in a "src" subdirectory + let src_path = Path::new(&crate_path).join("src"); - for item in parsed_file.items { - match item { - Item::Struct(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("struct")) => items.push(format!("{:?}", item)), - Item::Enum(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("enum")) => items.push(format!("{:?}", item)), - Item::Trait(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("trait")) => items.push(format!("{:?}", item)), - Item::Fn(_) if filters.as_ref().map_or(true, |f| f.item_type.as_deref() == Some("fn")) => items.push(format!("{:?}", item)), - _ => {} + fn visit_rs_files(dir: &Path, cb: &mut F) { + if let Ok(entries) = fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + visit_rs_files(&path, cb); + } else if path.extension().and_then(|ext| ext.to_str()) == Some("rs") { + cb(&path); } } } } - Ok(items.join("\n")) + visit_rs_files(&src_path, &mut |path: &Path| { + if let Ok(content) = fs::read_to_string(path) { + if let Ok(parsed_file) = syn::parse_file(&content) { + for item in parsed_file.items { + if let Item::Struct(s) = &item { + if filters.as_ref().map_or(true, |f| f.item_type.as_deref().map_or(true, |ty| ty == "struct")) { + items.push(("Structs", format!("{}", s.ident))); + } + } + if let Item::Enum(e) = &item { + if filters.as_ref().map_or(true, |f| f.item_type.as_deref().map_or(true, |ty| ty == "enum")) { + items.push(("Enums", format!("{}", e.ident))); + } + } + if let Item::Trait(t) = &item { + if filters.as_ref().map_or(true, |f| f.item_type.as_deref().map_or(true, |ty| ty == "trait")) { + items.push(("Traits", format!("{}", t.ident))); + } + } + if let Item::Fn(f) = &item { + if filters.as_ref().map_or(true, |f| f.item_type.as_deref().map_or(true, |ty| ty == "fn")) { + items.push(("Functions", format!("{}", f.sig.ident))); + } + } + } + } + } + }); + + use std::collections::BTreeMap; + let mut grouped: BTreeMap<&str, Vec> = BTreeMap::new(); + for (kind, name) in items { + grouped.entry(kind).or_default().push(name); + } + + let mut output = String::new(); + for (kind, names) in grouped { + output.push_str(&format!("## {}\n", kind)); + for name in names { + output.push_str(&format!("- {}\n", name)); + } + output.push('\n'); + } + + Ok(output) } From ac46dde35c2a96fd5093a89218b014d979f0d569 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sun, 6 Jul 2025 04:40:52 +0000 Subject: [PATCH 20/22] tdlr didn't work with stdio --- README.md | 10 ++++++--- src/bin/cratedocs.rs | 48 +++++++++++------------------------------- src/tools/docs/docs.rs | 22 +++++++++++++++++-- src/tools/item_list.rs | 2 +- src/tools/mod.rs | 1 + 5 files changed, 41 insertions(+), 42 deletions(-) diff --git a/README.md b/README.md index 40f667d..8ad5129 100644 --- a/README.md +++ b/README.md @@ -41,15 +41,16 @@ cargo run --bin cratedocs http --debug ### Directly Testing Documentation Tools -# Enumerate crate items -cargo run --bin cratedocs test --tool list_crate_items --crate-name serde --version 1.0.0 --item-type struct -cargo run --bin cratedocs test --tool list_crate_items --crate-name tokio --version 1.28.0 --visibility pub --module tokio::sync You can directly test the documentation tools from the command line without starting a server: ```bash # Get help for the test command cargo run --bin cratedocs test --tool help +# Enumerate crate items (step by step) +cargo run --bin cratedocs test --tool list_crate_items --crate-name serde --version 1.0.0 --item-type struct +cargo run --bin cratedocs test --tool list_crate_items --crate-name tokio --version 1.28.0 --visibility pub --module tokio::sync + # Look up crate documentation cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio @@ -74,6 +75,9 @@ cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --output t # Summarize output by stripping LICENSE and VERSION sections, limits to xxxxx tokens (uses huggingface tokenizer) cargo run --bin cratedocs test --tool lookup_crate --crate-name tokio --tldr --max_tokens 48000 + + + ``` By default, the HTTP server will listen on `http://127.0.0.1:8080/sse`. diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index 0d05b1e..fe1e100 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -9,7 +9,7 @@ use std::net::SocketAddr; use tokio::io::{stdin, stdout}; use tracing_appender::rolling::{RollingFileAppender, Rotation}; use tracing_subscriber::{self, EnvFilter, layer::SubscriberExt, util::SubscriberInitExt}; -use regex::Regex; +use cratedocs_mcp::tools::tldr; #[derive(Parser)] #[command(author, version = "0.2.0", about, long_about = None)] @@ -29,6 +29,9 @@ enum Commands { /// Enable debug logging #[arg(short, long)] debug: bool, + /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) + #[arg(long)] + tldr: bool, }, /// Run the server with HTTP/SSE interface Http { @@ -109,7 +112,7 @@ async fn main() -> Result<()> { println!("{}", env!("CARGO_PKG_VERSION")); Ok(()) }, - Commands::Stdio { debug } => run_stdio_server(debug).await, + Commands::Stdio { debug, tldr } => run_stdio_server(debug, tldr).await, Commands::Http { address, debug } => run_http_server(address, debug).await, Commands::Test { tool, @@ -145,7 +148,7 @@ async fn main() -> Result<()> { } } -async fn run_stdio_server(debug: bool) -> Result<()> { +async fn run_stdio_server(debug: bool, tldr: bool) -> Result<()> { // Set up file appender for logging let file_appender = RollingFileAppender::new(Rotation::DAILY, "logs", "stdio-server.log"); @@ -164,13 +167,16 @@ async fn run_stdio_server(debug: bool) -> Result<()> { tracing::info!("Starting MCP documentation server in STDIN/STDOUT mode"); // Create an instance of our documentation router - let router = RouterService(DocRouter::new()); + // If tldr is needed globally, you may want to pass it to DocRouter or handle it in tool output + let router = RouterService(DocRouter::new_with_tldr(tldr)); // Create and run the server let server = Server::new(router); let transport = ByteTransport::new(stdin(), stdout()); tracing::info!("Documentation server initialized and ready to handle requests"); + // Note: tldr is parsed and available, but not yet used in stdio mode. + // If you want to apply TLDR globally, you would need to modify DocRouter or Server to use it. Ok(server.run(transport).await?) } @@ -201,36 +207,6 @@ async fn run_http_server(address: String, debug: bool) -> Result<()> { } // --- TLDR Helper Function --- -fn apply_tldr(input: &str) -> String { - // Remove LICENSE and VERSION(S) sections by skipping lines between those headings and the next heading or EOF. - let mut output = Vec::new(); - let mut skip = false; - - // Match any heading (with or without space) for LICENSE or VERSION(S) - let tldr_section_re = Regex::new(r"(?i)^\s*#+\s*license\b|^\s*#+\s*version(s)?\b|^\s*#+license\b|^\s*#+version(s)?\b").unwrap(); - // Match any heading (for ending the skip) - let heading_re = Regex::new(r"^\s*#+").unwrap(); - // Match tags including start, end, and inline attributes - let detail_tag_re = Regex::new(r"<[/]?detail.*?>").unwrap(); - - for line in input.lines() { - // Start skipping if we hit a LICENSE or VERSION(S) heading - if !skip && tldr_section_re.is_match(line) { - skip = true; - continue; // skip the heading line itself - } - // Stop skipping at the next heading (but do not skip the heading itself) - if skip && heading_re.is_match(line) && !tldr_section_re.is_match(line) { - skip = false; - } - if !skip { - // Remove tags from the line - let cleaned_line = detail_tag_re.replace_all(line, "").to_string(); - output.push(cleaned_line.to_string()); - } - } - output.iter().map(|s| s.as_str()).collect::>().join("\n") -} /// Configuration for the test tool struct TestToolConfig { @@ -407,7 +383,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { // TL;DR processing: strip LICENSE and VERSION(S) sections if --tldr is set if tldr { - content_str = apply_tldr(&content_str); + content_str = tldr::apply_tldr(&content_str); } let formatted_output = match format.as_str() { @@ -507,7 +483,7 @@ async fn run_test_tool(config: TestToolConfig) -> Result<()> { } #[cfg(test)] mod tldr_tests { - use super::apply_tldr; + use cratedocs_mcp::tools::tldr::apply_tldr; #[test] fn test_apply_tldr_removes_license_and_versions() { diff --git a/src/tools/docs/docs.rs b/src/tools/docs/docs.rs index e2f8842..0048fc1 100644 --- a/src/tools/docs/docs.rs +++ b/src/tools/docs/docs.rs @@ -1,4 +1,5 @@ use crate::tools::item_list; +use crate::tools::tldr; use std::{future::Future, pin::Pin, sync::Arc}; use mcp_core::{ @@ -47,6 +48,7 @@ impl DocCache { pub struct DocRouter { pub client: Client, pub cache: DocCache, + pub tldr: bool, } impl Default for DocRouter { @@ -56,12 +58,16 @@ impl Default for DocRouter { } impl DocRouter { - pub fn new() -> Self { + pub fn new_with_tldr(tldr: bool) -> Self { Self { client: Client::new(), cache: DocCache::new(), + tldr, } } + pub fn new() -> Self { + Self::new_with_tldr(false) + } // Fetch crate documentation from docs.rs async fn lookup_crate(&self, crate_name: String, version: Option) -> Result { @@ -363,9 +369,10 @@ impl mcp_server::Router for DocRouter { let this = self.clone(); let tool_name = tool_name.to_string(); let arguments = arguments.clone(); + let tldr = self.tldr; Box::pin(async move { - match tool_name.as_str() { + let mut result = match tool_name.as_str() { "lookup_crate" => { let crate_name = arguments .get("crate_name") @@ -455,7 +462,18 @@ impl mcp_server::Router for DocRouter { Ok(vec![Content::text(result)]) } _ => Err(ToolError::NotFound(format!("Tool {} not found", tool_name))), + }?; + + // Apply TLDR filter if enabled + if tldr { + for content in &mut result { + if let Content::Text(text) = content { + text.text = tldr::apply_tldr(&text.text); + } + } } + + Ok(result) }) } diff --git a/src/tools/item_list.rs b/src/tools/item_list.rs index 6e0c4dc..7e7fa42 100644 --- a/src/tools/item_list.rs +++ b/src/tools/item_list.rs @@ -4,7 +4,7 @@ use std::fs; use std::path::Path; use tar::Archive; use flate2::read::GzDecoder; -use syn::{File, Item}; +use syn::{Item}; /// Represents filters for item listing. #[derive(Debug)] diff --git a/src/tools/mod.rs b/src/tools/mod.rs index cd6b80a..5b46a6f 100644 --- a/src/tools/mod.rs +++ b/src/tools/mod.rs @@ -1,5 +1,6 @@ pub mod docs; pub mod item_list; +pub mod tldr; pub use docs::DocRouter; pub use docs::docs::DocCache; From 5e5ba5a30e1dfdac0de6a676a1f2807c6c4237e9 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sun, 6 Jul 2025 04:41:22 +0000 Subject: [PATCH 21/22] added tldr.rs --- src/tools/tldr.rs | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/tools/tldr.rs diff --git a/src/tools/tldr.rs b/src/tools/tldr.rs new file mode 100644 index 0000000..452c778 --- /dev/null +++ b/src/tools/tldr.rs @@ -0,0 +1,33 @@ +use regex::Regex; + +/// Remove LICENSE and VERSION(S) sections by skipping lines between those headings and the next heading or EOF. +/// Also removes tags. +pub fn apply_tldr(input: &str) -> String { + let mut output = Vec::new(); + let mut skip = false; + + // Match any heading (with or without space) for LICENSE or VERSION(S) + let tldr_section_re = Regex::new(r"(?i)^\s*#+\s*(license|version(s)?)\b").unwrap(); + // Match any heading (for ending the skip) + let heading_re = Regex::new(r"^\s*#+").unwrap(); + // Match tags including start, end, and inline attributes + let detail_tag_re = Regex::new(r"<[/]?detail.*?>").unwrap(); + + for line in input.lines() { + // Start skipping if we hit a LICENSE or VERSION(S) heading + if !skip && tldr_section_re.is_match(line) { + skip = true; + continue; // skip the heading line itself + } + // Stop skipping at the next heading (but do not skip the heading itself) + if skip && heading_re.is_match(line) { + skip = false; + } + if !skip { + // Remove tags from the line + let cleaned_line = detail_tag_re.replace_all(line, "").to_string(); + output.push(cleaned_line); + } + } + output.join("\n") +} \ No newline at end of file From c48014e435e4d0304475c936505cb74f0e52e636 Mon Sep 17 00:00:00 2001 From: elasticdotventures Date: Sun, 6 Jul 2025 04:58:46 +0000 Subject: [PATCH 22/22] added --max_tokens ### to stdio mode --- src/bin/cratedocs.rs | 9 ++++++--- src/tools/docs/docs.rs | 31 +++++++++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/src/bin/cratedocs.rs b/src/bin/cratedocs.rs index fe1e100..6223d7f 100644 --- a/src/bin/cratedocs.rs +++ b/src/bin/cratedocs.rs @@ -32,6 +32,9 @@ enum Commands { /// Summarize output by stripping LICENSE and VERSION sections (TL;DR mode) #[arg(long)] tldr: bool, + /// Maximum number of tokens for output (token-aware truncation) + #[arg(long)] + max_tokens: Option, }, /// Run the server with HTTP/SSE interface Http { @@ -112,7 +115,7 @@ async fn main() -> Result<()> { println!("{}", env!("CARGO_PKG_VERSION")); Ok(()) }, - Commands::Stdio { debug, tldr } => run_stdio_server(debug, tldr).await, + Commands::Stdio { debug, tldr, max_tokens } => run_stdio_server(debug, tldr, max_tokens).await, Commands::Http { address, debug } => run_http_server(address, debug).await, Commands::Test { tool, @@ -148,7 +151,7 @@ async fn main() -> Result<()> { } } -async fn run_stdio_server(debug: bool, tldr: bool) -> Result<()> { +async fn run_stdio_server(debug: bool, tldr: bool, max_tokens: Option) -> Result<()> { // Set up file appender for logging let file_appender = RollingFileAppender::new(Rotation::DAILY, "logs", "stdio-server.log"); @@ -168,7 +171,7 @@ async fn run_stdio_server(debug: bool, tldr: bool) -> Result<()> { // Create an instance of our documentation router // If tldr is needed globally, you may want to pass it to DocRouter or handle it in tool output - let router = RouterService(DocRouter::new_with_tldr(tldr)); + let router = RouterService(DocRouter::new_with_tldr_and_max_tokens(tldr, max_tokens)); // Create and run the server let server = Server::new(router); diff --git a/src/tools/docs/docs.rs b/src/tools/docs/docs.rs index 0048fc1..c43c2dd 100644 --- a/src/tools/docs/docs.rs +++ b/src/tools/docs/docs.rs @@ -49,6 +49,7 @@ pub struct DocRouter { pub client: Client, pub cache: DocCache, pub tldr: bool, + pub max_tokens: Option, } impl Default for DocRouter { @@ -58,15 +59,19 @@ impl Default for DocRouter { } impl DocRouter { - pub fn new_with_tldr(tldr: bool) -> Self { + pub fn new_with_tldr_and_max_tokens(tldr: bool, max_tokens: Option) -> Self { Self { client: Client::new(), cache: DocCache::new(), tldr, + max_tokens, } } + pub fn new_with_tldr(tldr: bool) -> Self { + Self::new_with_tldr_and_max_tokens(tldr, None) + } pub fn new() -> Self { - Self::new_with_tldr(false) + Self::new_with_tldr_and_max_tokens(false, None) } // Fetch crate documentation from docs.rs @@ -370,6 +375,7 @@ impl mcp_server::Router for DocRouter { let tool_name = tool_name.to_string(); let arguments = arguments.clone(); let tldr = self.tldr; + let max_tokens = self.max_tokens; Box::pin(async move { let mut result = match tool_name.as_str() { @@ -473,6 +479,27 @@ impl mcp_server::Router for DocRouter { } } + // Apply max_tokens truncation if enabled + if let Some(max_tokens) = max_tokens { + for content in &mut result { + if let Content::Text(text) = content { + if let Ok(token_count) = crate::tools::count_tokens(&text.text) { + if token_count > max_tokens { + let mut truncated = text.text.clone(); + while crate::tools::count_tokens(&truncated).map_or(0, |c| c) > max_tokens && !truncated.is_empty() { + truncated.pop(); + } + if let Some(last_space) = truncated.rfind(' ') { + truncated.truncate(last_space); + } + truncated.push_str(" 内容被截断"); + text.text = truncated; + } + } + } + } + } + Ok(result) }) }