ajitpratap0 · ajitpratap0 · Feb 28, 2026 · Feb 28, 2026
@@ -1,4 +1,3 @@
-
                                  Apache License
                            Version 2.0, January 2004
                         http://www.apache.org/licenses/

@@ -12,16 +12,34 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Package advisor provides SQL query optimization suggestions by analyzing parsed ASTs.
+// Package advisor provides SQL query optimization analysis by walking parsed ASTs and
+// applying configurable rules that detect common performance anti-patterns.
 //
-// The optimizer walks the Abstract Syntax Tree produced by the GoSQLX parser and
-// applies configurable rules to detect common performance anti-patterns. Each rule
-// produces zero or more Suggestions with severity levels, human-readable messages,
-// and (where possible) suggested SQL rewrites.
+// The central type is Optimizer, created with New() (all built-in rules) or
+// NewWithRules(...Rule) for a custom rule set. Optimizer.AnalyzeSQL is a convenience
+// method that parses a SQL string and returns an OptimizationResult containing a slice
+// of Suggestion values, a query complexity classification (simple / moderate / complex),
+// and an optimization score from 0 (worst) to 100 (no issues). Each Suggestion carries
+// a rule ID, severity (info / warning / error), a human-readable message and detail, the
+// source location, and where possible a suggested SQL rewrite.
+//
+// Eight built-in rules are registered by DefaultRules:
+//
+//	OPT-001  SELECT * Detection         — recommend listing columns explicitly
+//	OPT-002  Missing WHERE Clause       — UPDATE/DELETE without WHERE affects all rows
+//	OPT-003  Cartesian Product          — implicit cross join from multiple FROM tables
+//	OPT-004  SELECT DISTINCT Overuse    — DISTINCT may mask incorrect join conditions
+//	OPT-005  Subquery in WHERE          — suggest converting correlated subqueries to JOINs
+//	OPT-006  OR in WHERE Clause         — OR on different columns may prevent index usage
+//	OPT-007  Leading Wildcard in LIKE   — LIKE '%...' forces a full table scan
+//	OPT-008  Function on Indexed Column — wrapping a column in a function defeats B-tree indexes
+//
+// Custom rules implement the Rule interface (ID, Name, Description, Analyze) and are
+// passed to NewWithRules. All built-in rules are stateless and safe for concurrent use.
 //
 // Quick Start:
 //
-//	opt := optimizer.New()
+//	opt := advisor.New()
 //	result, err := opt.AnalyzeSQL("SELECT * FROM users")
 //	if err != nil {
 //	    log.Fatal(err)

@@ -12,6 +12,31 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+// Package main is compiled as a C-shared library (libgosqlx) that exposes the GoSQLX
+// SQL parsing engine to non-Go languages via a plain C FFI.
+//
+// The library is built with "go build -buildmode=c-shared" (see build.sh) and produces
+// a platform-native shared object — libgosqlx.so on Linux, libgosqlx.dylib on macOS,
+// and libgosqlx.dll on Windows — together with a generated C header (libgosqlx.h).
+// The primary consumer is the Python package pygosqlx, which loads the library at
+// runtime via ctypes, but any language with a C FFI (Ruby, Node.js via N-API, Rust FFI,
+// etc.) can call the exported symbols directly.
+//
+// All exported functions follow the same contract: they accept C strings (null-terminated
+// UTF-8) and return a newly allocated C string containing a JSON-encoded result object.
+// Callers must free every returned string with gosqlx_free to avoid memory leaks.
+//
+// Exported symbols:
+//
+//	gosqlx_parse(sql)             — parse SQL, return ParseResult JSON
+//	gosqlx_validate(sql)          — validate SQL syntax, return ValidationResult JSON
+//	gosqlx_format(sql)            — format SQL, return FormatResult JSON
+//	gosqlx_extract_tables(sql)    — extract referenced table names as JSON array
+//	gosqlx_extract_columns(sql)   — extract referenced column names as JSON array
+//	gosqlx_extract_functions(sql) — extract referenced function names as JSON array
+//	gosqlx_extract_metadata(sql)  — extract tables, columns, functions with schema qualification
+//	gosqlx_version()              — return the library version string (e.g. "1.9.0")
+//	gosqlx_free(ptr)              — free a string previously returned by any gosqlx_* function
 package main
 
 // #include <stdlib.h>

@@ -109,19 +109,31 @@ func (c *keywordSuggestionCache) size() int {
 	return len(c.cache)
 }
 
-// ClearSuggestionCache clears the keyword suggestion cache.
-// Useful for testing or when keyword list changes.
+// ClearSuggestionCache removes all entries from the keyword suggestion cache.
+// Call this in tests to ensure a clean state between test cases, or after
+// modifying the keyword list so that stale suggestions are not served.
 func ClearSuggestionCache() {
 	suggestionCache.clear()
 }
 
-// SuggestionCacheSize returns the current size of the suggestion cache.
-// Useful for monitoring and debugging.
+// SuggestionCacheSize returns the number of entries currently held in the keyword
+// suggestion cache. Use this for monitoring cache growth and deciding whether to
+// adjust the maximum size.
 func SuggestionCacheSize() int {
 	return suggestionCache.size()
 }
 
-// SuggestionCacheStats returns cache statistics
+// SuggestionCacheStats holds observability metrics for the keyword suggestion cache.
+// Retrieve an instance via GetSuggestionCacheStats and reset counters via
+// ResetSuggestionCacheStats.
+//
+// Fields:
+//   - Size: Current number of entries in the cache
+//   - MaxSize: Configured maximum capacity (default 1000)
+//   - Hits: Number of cache lookups that returned a cached value
+//   - Misses: Number of cache lookups that required computing a new suggestion
+//   - Evictions: Total number of entries removed during partial eviction sweeps
+//   - HitRate: Ratio of Hits to (Hits + Misses); 0.0 when no lookups have occurred
 type SuggestionCacheStats struct {
 	Size      int
 	MaxSize   int
@@ -131,7 +143,14 @@ type SuggestionCacheStats struct {
 	HitRate   float64
 }
 
-// GetSuggestionCacheStats returns current cache statistics
+// GetSuggestionCacheStats returns a snapshot of the current keyword suggestion cache
+// metrics. The returned struct is safe to read without any additional locking.
+// Use this in observability dashboards or benchmarks to track cache efficiency.
+//
+// Example:
+//
+//	stats := errors.GetSuggestionCacheStats()
+//	fmt.Printf("Cache hit rate: %.1f%%\n", stats.HitRate*100)
 func GetSuggestionCacheStats() SuggestionCacheStats {
 	hits := atomic.LoadUint64(&suggestionCache.hits)
 	misses := atomic.LoadUint64(&suggestionCache.misses)
@@ -153,8 +172,9 @@ func GetSuggestionCacheStats() SuggestionCacheStats {
 	}
 }
 
-// ResetSuggestionCacheStats resets the cache statistics counters.
-// Useful for testing and monitoring.
+// ResetSuggestionCacheStats zeroes all hit, miss, and eviction counters in the
+// keyword suggestion cache without clearing cached entries. Call this at the start
+// of a benchmark or monitoring interval to obtain a clean measurement window.
 func ResetSuggestionCacheStats() {
 	atomic.StoreUint64(&suggestionCache.hits, 0)
 	atomic.StoreUint64(&suggestionCache.misses, 0)

@@ -21,8 +21,16 @@ import (
 	"github.com/ajitpratap0/GoSQLX/pkg/models"
 )
 
-// FormatErrorWithContext formats an error with SQL context and visual indicators
-// This is a convenience function that wraps the Error.Error() method
+// FormatErrorWithContext formats an error with SQL context and visual indicators.
+// For *Error values it delegates to the structured Error.Error() method, which
+// includes code, location, SQL context highlighting, hint, and help URL. For all
+// other error types it falls back to a plain "Error: <message>" string.
+//
+// Parameters:
+//   - err: The error to format (may be *Error or a generic error)
+//   - sql: The SQL source; currently unused for *Error (context is already attached)
+//
+// Returns the formatted error string ready for display to end users.
 func FormatErrorWithContext(err error, sql string) string {
 	// If it's already a structured error, just return its formatted string
 	if structErr, ok := err.(*Error); ok {
@@ -33,7 +41,19 @@ func FormatErrorWithContext(err error, sql string) string {
 	return fmt.Sprintf("Error: %v", err)
 }
 
-// FormatErrorWithContextAt formats an error at a specific location with SQL context
+// FormatErrorWithContextAt creates a structured error for the given code and location,
+// attaches the SQL context window, and auto-generates a hint. It then returns the
+// fully-formatted error string. This is useful for one-shot error formatting without
+// retaining the *Error value.
+//
+// Parameters:
+//   - code: ErrorCode classifying the error category (e.g., ErrCodeExpectedToken)
+//   - message: Human-readable description of the error
+//   - location: Precise line/column where the error occurred
+//   - sql: Full SQL source used to generate the context window
+//   - highlightLen: Number of characters to highlight at the error column
+//
+// Returns the complete formatted error string including context highlighting.
 func FormatErrorWithContextAt(code ErrorCode, message string, location models.Location, sql string, highlightLen int) string {
 	err := NewError(code, message, location)
 	err = err.WithContext(sql, highlightLen)
@@ -46,8 +66,17 @@ func FormatErrorWithContextAt(code ErrorCode, message string, location models.Lo
 	return err.Error()
 }
 
-// FormatMultiLineContext formats error context for multi-line SQL with extended context
-// Shows up to 3 lines (1 before, error line, 1 after) with proper indentation
+// FormatMultiLineContext formats an SQL context window around a specific error location.
+// It shows up to three lines: one before the error, the error line itself, and one
+// after. A caret indicator (^) is rendered below the error column, with optional
+// multi-character highlighting when highlightLen > 1.
+//
+// Parameters:
+//   - sql: The full SQL source string (may contain newlines)
+//   - location: The line/column of the error (1-based)
+//   - highlightLen: Number of characters to highlight; 1 renders a single caret
+//
+// Returns the formatted context block, or an empty string if location is invalid.
 func FormatMultiLineContext(sql string, location models.Location, highlightLen int) string {
 	if sql == "" || location.Line <= 0 {
 		return ""
@@ -107,8 +136,18 @@ func FormatMultiLineContext(sql string, location models.Location, highlightLen i
 	return sb.String()
 }
 
-// FormatErrorSummary provides a brief summary of an error without full context
-// Useful for logging or when SQL context is not needed
+// FormatErrorSummary provides a concise one-line error summary suitable for
+// structured logging and monitoring systems where a full context window would
+// be too verbose. For *Error values the output format is:
+//
+//	[E2001] unexpected token: COMMA at line 5, column 20
+//
+// For other error types the output is "Error: <message>".
+//
+// Parameters:
+//   - err: The error to summarise
+//
+// Returns the one-line summary string.
 func FormatErrorSummary(err error) string {
 	if structErr, ok := err.(*Error); ok {
 		return fmt.Sprintf("[%s] %s at line %d, column %d",
@@ -120,7 +159,20 @@ func FormatErrorSummary(err error) string {
 	return fmt.Sprintf("Error: %v", err)
 }
 
-// FormatErrorWithSuggestion formats an error with an intelligent suggestion
+// FormatErrorWithSuggestion creates and formats a structured error that includes a
+// manually provided hint. When suggestion is empty, the function falls back to
+// auto-generating a hint via GenerateHint. This is the preferred formatter when
+// the caller already knows the correct fix.
+//
+// Parameters:
+//   - code: ErrorCode classifying the error category
+//   - message: Human-readable description of the error
+//   - location: Precise line/column where the error occurred
+//   - sql: Full SQL source used to generate the context window
+//   - highlightLen: Number of characters to highlight at the error column
+//   - suggestion: Custom hint text; empty string triggers auto-generation
+//
+// Returns the complete formatted error string.
 func FormatErrorWithSuggestion(code ErrorCode, message string, location models.Location, sql string, highlightLen int, suggestion string) string {
 	err := NewError(code, message, location)
 	err = err.WithContext(sql, highlightLen)
@@ -137,7 +189,16 @@ func FormatErrorWithSuggestion(code ErrorCode, message string, location models.L
 	return err.Error()
 }
 
-// FormatErrorList formats multiple errors in a readable list
+// FormatErrorList formats a slice of structured errors into a numbered list with
+// full context for each entry. The output begins with a count line and separates
+// each error with a blank line.
+//
+// Returns "No errors" when the slice is empty.
+//
+// Parameters:
+//   - errors: Slice of *Error values to format
+//
+// Returns the multi-error report string.
 func FormatErrorList(errors []*Error) string {
 	if len(errors) == 0 {
 		return "No errors"
@@ -155,7 +216,21 @@ func FormatErrorList(errors []*Error) string {
 	return sb.String()
 }
 
-// FormatErrorWithExample formats an error with a corrected example
+// FormatErrorWithExample formats an error and appends a side-by-side "Wrong / Correct"
+// example to the hint. This is particularly useful for educational error messages
+// (e.g., in linters or IDEs) where showing the correct pattern helps users learn
+// the expected SQL syntax.
+//
+// Parameters:
+//   - code: ErrorCode classifying the error category
+//   - message: Human-readable description of the error
+//   - location: Precise line/column where the error occurred
+//   - sql: Full SQL source used to generate the context window
+//   - highlightLen: Number of characters to highlight at the error column
+//   - wrongExample: The erroneous SQL fragment (e.g., "SELECT * FORM users")
+//   - correctExample: The corrected SQL fragment (e.g., "SELECT * FROM users")
+//
+// Returns the complete formatted error string including the before/after example.
 func FormatErrorWithExample(code ErrorCode, message string, location models.Location, sql string, highlightLen int, wrongExample, correctExample string) string {
 	err := NewError(code, message, location)
 	err = err.WithContext(sql, highlightLen)
@@ -167,7 +242,19 @@ func FormatErrorWithExample(code ErrorCode, message string, location models.Loca
 	return err.Error()
 }
 
-// FormatContextWindow formats a larger context window (up to N lines before and after)
+// FormatContextWindow formats a configurable SQL context window of up to linesBefore
+// lines before and linesAfter lines after the error line. Prefer this over
+// FormatMultiLineContext when more surrounding context is needed (e.g., in IDE
+// hover messages or verbose diagnostic reports).
+//
+// Parameters:
+//   - sql: The full SQL source string
+//   - location: The line/column of the error (1-based)
+//   - highlightLen: Number of characters to highlight at the error column
+//   - linesBefore: Number of source lines to display before the error line
+//   - linesAfter: Number of source lines to display after the error line
+//
+// Returns the formatted context block, or an empty string if location is invalid.
 func FormatContextWindow(sql string, location models.Location, highlightLen int, linesBefore, linesAfter int) string {
 	if sql == "" || location.Line <= 0 {
 		return ""
@@ -231,21 +318,40 @@ func FormatContextWindow(sql string, location models.Location, highlightLen int,
 	return sb.String()
 }
 
-// IsStructuredError checks if an error is a structured GoSQLX error
+// IsStructuredError reports whether err is a GoSQLX *Error value.
+// Use this to distinguish GoSQLX structured errors from generic Go errors
+// before calling functions that require *Error (e.g., ExtractLocation).
+//
+// Example:
+//
+//	if errors.IsStructuredError(err) {
+//	    loc, _ := errors.ExtractLocation(err)
+//	    // use loc for IDE diagnostics
+//	}
 func IsStructuredError(err error) bool {
 	_, ok := err.(*Error)
 	return ok
 }
 
-// ExtractLocation extracts location information from an error
+// ExtractLocation extracts the line/column location from a GoSQLX *Error.
+// This is the preferred way to obtain location data for IDE integrations such as
+// LSP diagnostics, since it handles the type assertion safely.
+//
+// Returns the Location and true when err is a *Error; returns a zero Location
+// and false for all other error types.
 func ExtractLocation(err error) (models.Location, bool) {
 	if structErr, ok := err.(*Error); ok {
 		return structErr.Location, true
 	}
 	return models.Location{}, false
 }
 
-// ExtractErrorCode extracts the error code from an error
+// ExtractErrorCode extracts the ErrorCode from a GoSQLX *Error.
+// Unlike GetCode, this function returns a boolean indicating whether the extraction
+// succeeded, making it suitable for use in type-switch–style handling.
+//
+// Returns the ErrorCode and true when err is a *Error; returns an empty string
+// and false for all other error types.
 func ExtractErrorCode(err error) (ErrorCode, bool) {
 	if structErr, ok := err.(*Error); ok {
 		return structErr.Code, true