diff --git a/LICENSE b/LICENSE index 2752dbd1..28ea770e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,3 @@ - Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ diff --git a/pkg/advisor/optimizer.go b/pkg/advisor/optimizer.go index 8ccf5aac..868037bf 100644 --- a/pkg/advisor/optimizer.go +++ b/pkg/advisor/optimizer.go @@ -12,16 +12,34 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package advisor provides SQL query optimization suggestions by analyzing parsed ASTs. +// Package advisor provides SQL query optimization analysis by walking parsed ASTs and +// applying configurable rules that detect common performance anti-patterns. // -// The optimizer walks the Abstract Syntax Tree produced by the GoSQLX parser and -// applies configurable rules to detect common performance anti-patterns. Each rule -// produces zero or more Suggestions with severity levels, human-readable messages, -// and (where possible) suggested SQL rewrites. +// The central type is Optimizer, created with New() (all built-in rules) or +// NewWithRules(...Rule) for a custom rule set. Optimizer.AnalyzeSQL is a convenience +// method that parses a SQL string and returns an OptimizationResult containing a slice +// of Suggestion values, a query complexity classification (simple / moderate / complex), +// and an optimization score from 0 (worst) to 100 (no issues). Each Suggestion carries +// a rule ID, severity (info / warning / error), a human-readable message and detail, the +// source location, and where possible a suggested SQL rewrite. +// +// Eight built-in rules are registered by DefaultRules: +// +// OPT-001 SELECT * Detection — recommend listing columns explicitly +// OPT-002 Missing WHERE Clause — UPDATE/DELETE without WHERE affects all rows +// OPT-003 Cartesian Product — implicit cross join from multiple FROM tables +// OPT-004 SELECT DISTINCT Overuse — DISTINCT may mask incorrect join conditions +// OPT-005 Subquery in WHERE — suggest converting correlated subqueries to JOINs +// OPT-006 OR in WHERE Clause — OR on different columns may prevent index usage +// OPT-007 Leading Wildcard in LIKE — LIKE '%...' forces a full table scan +// OPT-008 Function on Indexed Column — wrapping a column in a function defeats B-tree indexes +// +// Custom rules implement the Rule interface (ID, Name, Description, Analyze) and are +// passed to NewWithRules. All built-in rules are stateless and safe for concurrent use. // // Quick Start: // -// opt := optimizer.New() +// opt := advisor.New() // result, err := opt.AnalyzeSQL("SELECT * FROM users") // if err != nil { // log.Fatal(err) diff --git a/pkg/cbinding/cbinding.go b/pkg/cbinding/cbinding.go index 31171e8d..1257efa1 100644 --- a/pkg/cbinding/cbinding.go +++ b/pkg/cbinding/cbinding.go @@ -12,6 +12,31 @@ // See the License for the specific language governing permissions and // limitations under the License. +// Package main is compiled as a C-shared library (libgosqlx) that exposes the GoSQLX +// SQL parsing engine to non-Go languages via a plain C FFI. +// +// The library is built with "go build -buildmode=c-shared" (see build.sh) and produces +// a platform-native shared object — libgosqlx.so on Linux, libgosqlx.dylib on macOS, +// and libgosqlx.dll on Windows — together with a generated C header (libgosqlx.h). +// The primary consumer is the Python package pygosqlx, which loads the library at +// runtime via ctypes, but any language with a C FFI (Ruby, Node.js via N-API, Rust FFI, +// etc.) can call the exported symbols directly. +// +// All exported functions follow the same contract: they accept C strings (null-terminated +// UTF-8) and return a newly allocated C string containing a JSON-encoded result object. +// Callers must free every returned string with gosqlx_free to avoid memory leaks. +// +// Exported symbols: +// +// gosqlx_parse(sql) — parse SQL, return ParseResult JSON +// gosqlx_validate(sql) — validate SQL syntax, return ValidationResult JSON +// gosqlx_format(sql) — format SQL, return FormatResult JSON +// gosqlx_extract_tables(sql) — extract referenced table names as JSON array +// gosqlx_extract_columns(sql) — extract referenced column names as JSON array +// gosqlx_extract_functions(sql) — extract referenced function names as JSON array +// gosqlx_extract_metadata(sql) — extract tables, columns, functions with schema qualification +// gosqlx_version() — return the library version string (e.g. "1.9.0") +// gosqlx_free(ptr) — free a string previously returned by any gosqlx_* function package main // #include diff --git a/pkg/errors/builders.go b/pkg/errors/builders.go index 8412cfa6..70de257b 100644 --- a/pkg/errors/builders.go +++ b/pkg/errors/builders.go @@ -23,7 +23,13 @@ import ( // Builder functions for common error scenarios -// UnexpectedCharError creates an error for unexpected character in tokenization +// UnexpectedCharError creates an E1001 error for an unexpected character encountered +// during tokenization. The hint instructs the caller to remove or escape the character. +// +// Parameters: +// - char: The invalid character found in the SQL input +// - location: Line/column where the character appears +// - sql: Full SQL source used to generate visual context func UnexpectedCharError(char rune, location models.Location, sql string) *Error { return NewError( ErrCodeUnexpectedChar, @@ -32,7 +38,13 @@ func UnexpectedCharError(char rune, location models.Location, sql string) *Error ).WithContext(sql, 1).WithHint(fmt.Sprintf("Remove or escape the character '%c'", char)) } -// UnterminatedStringError creates an error for unterminated string literal +// UnterminatedStringError creates an E1002 error for an unterminated string literal. +// A string literal is considered unterminated when the tokenizer reaches end-of-input +// before finding the matching closing quote character. +// +// Parameters: +// - location: Line/column where the opening quote was found +// - sql: Full SQL source used to generate visual context func UnterminatedStringError(location models.Location, sql string) *Error { return NewError( ErrCodeUnterminatedString, @@ -41,7 +53,13 @@ func UnterminatedStringError(location models.Location, sql string) *Error { ).WithContext(sql, 1).WithHint(GenerateHint(ErrCodeUnterminatedString, "", "")) } -// UnterminatedBlockCommentError creates an error for an unclosed block comment. +// UnterminatedBlockCommentError creates an E1009 error for a block comment that +// was opened with /* but never closed with */. The hint guides the caller to add +// the missing closing delimiter. +// +// Parameters: +// - location: Line/column where the /* opening was found +// - sql: Full SQL source used to generate visual context func UnterminatedBlockCommentError(location models.Location, sql string) *Error { return NewError( ErrCodeUnterminatedBlockComment, @@ -50,7 +68,13 @@ func UnterminatedBlockCommentError(location models.Location, sql string) *Error ).WithContext(sql, 2).WithHint("Close the comment with */ or check for unmatched /*") } -// InvalidNumberError creates an error for invalid numeric literal +// InvalidNumberError creates an E1003 error for a malformed numeric literal, such as +// a number with multiple decimal points (1.2.3) or an invalid exponent format. +// +// Parameters: +// - value: The raw string that could not be parsed as a number +// - location: Line/column where the literal starts +// - sql: Full SQL source used to generate visual context func InvalidNumberError(value string, location models.Location, sql string) *Error { return NewError( ErrCodeInvalidNumber, @@ -59,7 +83,16 @@ func InvalidNumberError(value string, location models.Location, sql string) *Err ).WithContext(sql, len(value)).WithHint("Check the numeric format (e.g., 123, 123.45, 1.23e10)") } -// UnexpectedTokenError creates an error for unexpected token in parsing +// UnexpectedTokenError creates an E2001 error for a token that does not fit the +// expected grammar at the current parse position. An intelligent "Did you mean?" +// hint is auto-generated using Levenshtein distance when the token resembles a +// known SQL keyword. +// +// Parameters: +// - tokenType: The type of the unexpected token (e.g., "IDENT", "COMMA") +// - tokenValue: The raw text of the token (empty string if not applicable) +// - location: Line/column where the token was found +// - sql: Full SQL source used to generate visual context func UnexpectedTokenError(tokenType, tokenValue string, location models.Location, sql string) *Error { message := fmt.Sprintf("unexpected token: %s", tokenType) if tokenValue != "" { @@ -77,7 +110,16 @@ func UnexpectedTokenError(tokenType, tokenValue string, location models.Location return err } -// ExpectedTokenError creates an error for missing expected token +// ExpectedTokenError creates an E2002 error when a required token is absent or a +// different token appears in its place. The builder applies Levenshtein-based typo +// detection and auto-generates a "Did you mean?" hint when the found token is close +// to the expected one (e.g., FORM vs FROM). +// +// Parameters: +// - expected: The token or keyword that was required (e.g., "FROM") +// - got: The token or keyword that was actually found (e.g., "FORM") +// - location: Line/column where the mismatch occurred +// - sql: Full SQL source used to generate visual context func ExpectedTokenError(expected, got string, location models.Location, sql string) *Error { message := fmt.Sprintf("expected %s, got %s", expected, got) @@ -92,7 +134,14 @@ func ExpectedTokenError(expected, got string, location models.Location, sql stri return err } -// MissingClauseError creates an error for missing required SQL clause +// MissingClauseError creates an E2003 error when a required SQL clause is absent. +// For example, a SELECT statement without a FROM clause, or a JOIN without an ON +// condition. A pre-built hint from CommonHints is used if available. +// +// Parameters: +// - clause: Name of the missing clause (e.g., "FROM", "ON") +// - location: Line/column where the clause should have appeared +// - sql: Full SQL source used to generate visual context func MissingClauseError(clause string, location models.Location, sql string) *Error { err := NewError( ErrCodeMissingClause, @@ -110,7 +159,14 @@ func MissingClauseError(clause string, location models.Location, sql string) *Er return err } -// InvalidSyntaxError creates a general syntax error +// InvalidSyntaxError creates an E2004 general syntax error for violations that do +// not match any more specific error code. Use more specific builder functions (e.g., +// ExpectedTokenError, MissingClauseError) when possible for better diagnostics. +// +// Parameters: +// - description: Free-form description of the syntax problem +// - location: Line/column where the violation was detected +// - sql: Full SQL source used to generate visual context func InvalidSyntaxError(description string, location models.Location, sql string) *Error { return NewError( ErrCodeInvalidSyntax, @@ -119,7 +175,14 @@ func InvalidSyntaxError(description string, location models.Location, sql string ).WithContext(sql, 1).WithHint(GenerateHint(ErrCodeInvalidSyntax, "", "")) } -// UnsupportedFeatureError creates an error for unsupported SQL features +// UnsupportedFeatureError creates an E4001 error when the parser encounters a valid +// SQL construct that is recognised but not yet implemented. This distinguishes +// "not supported" from a syntax error so callers can handle the two cases separately. +// +// Parameters: +// - feature: Name or description of the unsupported feature (e.g., "LATERAL JOIN") +// - location: Line/column where the feature was encountered +// - sql: Full SQL source used to generate visual context func UnsupportedFeatureError(feature string, location models.Location, sql string) *Error { return NewError( ErrCodeUnsupportedFeature, @@ -128,7 +191,13 @@ func UnsupportedFeatureError(feature string, location models.Location, sql strin ).WithContext(sql, len(feature)).WithHint(GenerateHint(ErrCodeUnsupportedFeature, "", "")) } -// IncompleteStatementError creates an error for incomplete SQL statement +// IncompleteStatementError creates an E2005 error when the parser reaches the end +// of input before a SQL statement is complete. This typically means a clause or +// closing parenthesis is missing. +// +// Parameters: +// - location: Line/column at end-of-input where parsing stopped +// - sql: Full SQL source used to generate visual context func IncompleteStatementError(location models.Location, sql string) *Error { return NewError( ErrCodeIncompleteStatement, @@ -137,14 +206,32 @@ func IncompleteStatementError(location models.Location, sql string) *Error { ).WithContext(sql, 1).WithHint("Complete the SQL statement or check for missing clauses") } -// WrapError wraps an existing error with structured error information +// WrapError creates a structured error that wraps an existing cause error. +// Use this to add error code, location, and SQL context to low-level errors +// (e.g., I/O errors, unexpected runtime panics) so they integrate with the +// GoSQLX error handling pipeline. +// +// Parameters: +// - code: ErrorCode classifying the error category +// - message: Human-readable description of what went wrong +// - location: Line/column in the SQL where the problem occurred +// - sql: Full SQL source used to generate visual context +// - cause: Underlying error being wrapped (accessible via errors.Is / errors.As) func WrapError(code ErrorCode, message string, location models.Location, sql string, cause error) *Error { return NewError(code, message, location).WithContext(sql, 1).WithCause(cause) } // Tokenizer DoS Protection Errors (E1006-E1008) -// InputTooLargeError creates an error for input exceeding size limits +// InputTooLargeError creates an E1006 error when the SQL input exceeds the +// configured maximum byte size. This protects against denial-of-service attacks +// that submit extremely large SQL strings. The hint advises reducing input or +// adjusting the MaxInputSize configuration. +// +// Parameters: +// - size: Actual input size in bytes +// - maxSize: Configured maximum size in bytes +// - location: Typically the beginning of input (line 1, column 1) func InputTooLargeError(size, maxSize int64, location models.Location) *Error { return NewError( ErrCodeInputTooLarge, @@ -153,7 +240,16 @@ func InputTooLargeError(size, maxSize int64, location models.Location) *Error { ).WithHint(fmt.Sprintf("Reduce input size to under %d bytes or adjust MaxInputSize configuration", maxSize)) } -// TokenLimitReachedError creates an error for token count exceeding limit +// TokenLimitReachedError creates an E1007 error when the number of tokens produced +// by the tokenizer exceeds the configured maximum. This protects against pathological +// SQL that generates an excessive token stream. The hint suggests simplifying the +// query or raising the MaxTokens limit. +// +// Parameters: +// - count: Actual number of tokens produced +// - maxTokens: Configured token count limit +// - location: Position in SQL where the limit was hit +// - sql: Full SQL source used to generate visual context func TokenLimitReachedError(count, maxTokens int, location models.Location, sql string) *Error { return NewError( ErrCodeTokenLimitReached, @@ -162,7 +258,13 @@ func TokenLimitReachedError(count, maxTokens int, location models.Location, sql ).WithContext(sql, 1).WithHint(fmt.Sprintf("Simplify query or adjust MaxTokens limit (currently %d)", maxTokens)) } -// TokenizerPanicError creates an error for recovered tokenizer panic +// TokenizerPanicError creates an E1008 error for a panic that was recovered inside +// the tokenizer. This signals a tokenizer implementation bug rather than a user +// input problem. The hint asks the user to report the issue. +// +// Parameters: +// - panicValue: The value recovered from the panic (may be an error or string) +// - location: Position in SQL at the time of the panic func TokenizerPanicError(panicValue interface{}, location models.Location) *Error { return NewError( ErrCodeTokenizerPanic, @@ -173,7 +275,16 @@ func TokenizerPanicError(panicValue interface{}, location models.Location) *Erro // Parser Feature Errors (E2007-E2012) -// RecursionDepthLimitError creates an error for recursion depth exceeded +// RecursionDepthLimitError creates an E2007 error when the parser's recursion +// counter exceeds the configured maximum. This guards against deeply nested +// subqueries and expressions that could exhaust the call stack. The hint suggests +// simplifying the query structure. +// +// Parameters: +// - depth: Current recursion depth when the limit was reached +// - maxDepth: Configured maximum recursion depth +// - location: Position in SQL where the depth limit was triggered +// - sql: Full SQL source used to generate visual context func RecursionDepthLimitError(depth, maxDepth int, location models.Location, sql string) *Error { return NewError( ErrCodeRecursionDepthLimit, @@ -182,7 +293,14 @@ func RecursionDepthLimitError(depth, maxDepth int, location models.Location, sql ).WithContext(sql, 1).WithHint(fmt.Sprintf("Simplify nested expressions or subqueries (current limit: %d levels)", maxDepth)) } -// UnsupportedDataTypeError creates an error for unsupported data type +// UnsupportedDataTypeError creates an E2008 error when the parser encounters a +// column data type that GoSQLX does not yet support. Supported types include +// INTEGER, VARCHAR, TEXT, and TIMESTAMP. +// +// Parameters: +// - dataType: The unrecognised data type string (e.g., "GEOMETRY", "JSONB") +// - location: Line/column where the type token was found +// - sql: Full SQL source used to generate visual context func UnsupportedDataTypeError(dataType string, location models.Location, sql string) *Error { return NewError( ErrCodeUnsupportedDataType, @@ -191,7 +309,14 @@ func UnsupportedDataTypeError(dataType string, location models.Location, sql str ).WithContext(sql, len(dataType)).WithHint("Use a supported data type (e.g., INTEGER, VARCHAR, TEXT, TIMESTAMP)") } -// UnsupportedConstraintError creates an error for unsupported constraint +// UnsupportedConstraintError creates an E2009 error when a table constraint type +// is present in the SQL but not yet handled by the parser. Supported constraints +// are PRIMARY KEY, FOREIGN KEY, UNIQUE, NOT NULL, and CHECK. +// +// Parameters: +// - constraint: The unrecognised constraint type (e.g., "EXCLUDE") +// - location: Line/column where the constraint was found +// - sql: Full SQL source used to generate visual context func UnsupportedConstraintError(constraint string, location models.Location, sql string) *Error { return NewError( ErrCodeUnsupportedConstraint, @@ -200,7 +325,14 @@ func UnsupportedConstraintError(constraint string, location models.Location, sql ).WithContext(sql, len(constraint)).WithHint("Supported constraints: PRIMARY KEY, FOREIGN KEY, UNIQUE, NOT NULL, CHECK") } -// UnsupportedJoinError creates an error for unsupported JOIN type +// UnsupportedJoinError creates an E2010 error for a JOIN type that the parser +// recognises syntactically but does not yet fully support. Supported join types +// are INNER, LEFT, RIGHT, FULL, CROSS, and NATURAL. +// +// Parameters: +// - joinType: The unrecognised or unsupported join type string +// - location: Line/column where the join type token was found +// - sql: Full SQL source used to generate visual context func UnsupportedJoinError(joinType string, location models.Location, sql string) *Error { return NewError( ErrCodeUnsupportedJoin, @@ -209,7 +341,14 @@ func UnsupportedJoinError(joinType string, location models.Location, sql string) ).WithContext(sql, len(joinType)).WithHint("Supported JOINs: INNER JOIN, LEFT JOIN, RIGHT JOIN, FULL JOIN, CROSS JOIN, NATURAL JOIN") } -// InvalidCTEError creates an error for invalid CTE (WITH clause) syntax +// InvalidCTEError creates an E2011 error for malformed Common Table Expression +// (WITH clause) syntax. Common causes include a missing AS keyword, missing +// parentheses around the CTE body, or a missing trailing SELECT statement. +// +// Parameters: +// - description: Explanation of the specific CTE syntax problem +// - location: Line/column where the CTE syntax error was detected +// - sql: Full SQL source used to generate visual context func InvalidCTEError(description string, location models.Location, sql string) *Error { return NewError( ErrCodeInvalidCTE, @@ -218,7 +357,15 @@ func InvalidCTEError(description string, location models.Location, sql string) * ).WithContext(sql, 1).WithHint("Check WITH clause syntax: WITH cte_name AS (SELECT ...) SELECT * FROM cte_name") } -// InvalidSetOperationError creates an error for invalid set operation +// InvalidSetOperationError creates an E2012 error for an invalid UNION, INTERSECT, +// or EXCEPT set operation. The most common cause is a column count or type mismatch +// between the left and right queries. +// +// Parameters: +// - operation: The set operation keyword (e.g., "UNION", "INTERSECT") +// - description: Explanation of why the operation is invalid +// - location: Line/column where the set operation was found +// - sql: Full SQL source used to generate visual context func InvalidSetOperationError(operation, description string, location models.Location, sql string) *Error { return NewError( ErrCodeInvalidSetOperation, @@ -229,7 +376,14 @@ func InvalidSetOperationError(operation, description string, location models.Loc // Semantic Errors (E3001-E3004) -// UndefinedTableError creates an error for referencing an undefined table +// UndefinedTableError creates an E3001 error when a table name referenced in the +// query cannot be resolved in the available schema. The hint suggests checking for +// typos and verifying the table exists. +// +// Parameters: +// - tableName: The unresolved table name +// - location: Line/column where the table reference was found +// - sql: Full SQL source used to generate visual context func UndefinedTableError(tableName string, location models.Location, sql string) *Error { return NewError( ErrCodeUndefinedTable, @@ -238,7 +392,15 @@ func UndefinedTableError(tableName string, location models.Location, sql string) ).WithContext(sql, len(tableName)).WithHint(fmt.Sprintf("Check the table name '%s' for typos or ensure it exists in the schema", tableName)) } -// UndefinedColumnError creates an error for referencing an undefined column +// UndefinedColumnError creates an E3002 error when a column name cannot be found +// in the referenced table's schema. When tableName is non-empty, the error message +// includes the table context for clearer diagnosis. +// +// Parameters: +// - columnName: The unresolved column name +// - tableName: The table where the column was expected (empty if unknown) +// - location: Line/column where the column reference was found +// - sql: Full SQL source used to generate visual context func UndefinedColumnError(columnName, tableName string, location models.Location, sql string) *Error { message := fmt.Sprintf("column '%s' does not exist", columnName) hint := fmt.Sprintf("Check the column name '%s' for typos or ensure it exists in the table", columnName) @@ -253,7 +415,16 @@ func UndefinedColumnError(columnName, tableName string, location models.Location ).WithContext(sql, len(columnName)).WithHint(hint) } -// TypeMismatchError creates an error for type mismatch in expressions +// TypeMismatchError creates an E3003 error when two sides of an expression have +// incompatible types (e.g., comparing an INTEGER column to a TEXT literal without +// a CAST). When context is non-empty, it is included in the message for clarity. +// +// Parameters: +// - leftType: Data type of the left operand (e.g., "INTEGER") +// - rightType: Data type of the right operand (e.g., "TEXT") +// - context: Optional description of where the mismatch occurs (e.g., "WHERE clause") +// - location: Line/column where the type mismatch was detected +// - sql: Full SQL source used to generate visual context func TypeMismatchError(leftType, rightType, context string, location models.Location, sql string) *Error { message := fmt.Sprintf("type mismatch: cannot compare %s with %s", leftType, rightType) if context != "" { @@ -266,7 +437,15 @@ func TypeMismatchError(leftType, rightType, context string, location models.Loca ).WithContext(sql, 1).WithHint(fmt.Sprintf("Ensure compatible types or use explicit CAST to convert %s to %s", leftType, rightType)) } -// AmbiguousColumnError creates an error for ambiguous column reference +// AmbiguousColumnError creates an E3004 error when a column name appears in more +// than one table in scope and no qualifier disambiguates it. The hint suggests +// qualifying the column with a table name or alias. +// +// Parameters: +// - columnName: The ambiguous column name +// - tables: Slice of table names that all contain the column (may be empty if unknown) +// - location: Line/column where the ambiguous reference was found +// - sql: Full SQL source used to generate visual context func AmbiguousColumnError(columnName string, tables []string, location models.Location, sql string) *Error { tableList := "multiple tables" if len(tables) > 0 { diff --git a/pkg/errors/cache.go b/pkg/errors/cache.go index d66308fe..048789ed 100644 --- a/pkg/errors/cache.go +++ b/pkg/errors/cache.go @@ -109,19 +109,31 @@ func (c *keywordSuggestionCache) size() int { return len(c.cache) } -// ClearSuggestionCache clears the keyword suggestion cache. -// Useful for testing or when keyword list changes. +// ClearSuggestionCache removes all entries from the keyword suggestion cache. +// Call this in tests to ensure a clean state between test cases, or after +// modifying the keyword list so that stale suggestions are not served. func ClearSuggestionCache() { suggestionCache.clear() } -// SuggestionCacheSize returns the current size of the suggestion cache. -// Useful for monitoring and debugging. +// SuggestionCacheSize returns the number of entries currently held in the keyword +// suggestion cache. Use this for monitoring cache growth and deciding whether to +// adjust the maximum size. func SuggestionCacheSize() int { return suggestionCache.size() } -// SuggestionCacheStats returns cache statistics +// SuggestionCacheStats holds observability metrics for the keyword suggestion cache. +// Retrieve an instance via GetSuggestionCacheStats and reset counters via +// ResetSuggestionCacheStats. +// +// Fields: +// - Size: Current number of entries in the cache +// - MaxSize: Configured maximum capacity (default 1000) +// - Hits: Number of cache lookups that returned a cached value +// - Misses: Number of cache lookups that required computing a new suggestion +// - Evictions: Total number of entries removed during partial eviction sweeps +// - HitRate: Ratio of Hits to (Hits + Misses); 0.0 when no lookups have occurred type SuggestionCacheStats struct { Size int MaxSize int @@ -131,7 +143,14 @@ type SuggestionCacheStats struct { HitRate float64 } -// GetSuggestionCacheStats returns current cache statistics +// GetSuggestionCacheStats returns a snapshot of the current keyword suggestion cache +// metrics. The returned struct is safe to read without any additional locking. +// Use this in observability dashboards or benchmarks to track cache efficiency. +// +// Example: +// +// stats := errors.GetSuggestionCacheStats() +// fmt.Printf("Cache hit rate: %.1f%%\n", stats.HitRate*100) func GetSuggestionCacheStats() SuggestionCacheStats { hits := atomic.LoadUint64(&suggestionCache.hits) misses := atomic.LoadUint64(&suggestionCache.misses) @@ -153,8 +172,9 @@ func GetSuggestionCacheStats() SuggestionCacheStats { } } -// ResetSuggestionCacheStats resets the cache statistics counters. -// Useful for testing and monitoring. +// ResetSuggestionCacheStats zeroes all hit, miss, and eviction counters in the +// keyword suggestion cache without clearing cached entries. Call this at the start +// of a benchmark or monitoring interval to obtain a clean measurement window. func ResetSuggestionCacheStats() { atomic.StoreUint64(&suggestionCache.hits, 0) atomic.StoreUint64(&suggestionCache.misses, 0) diff --git a/pkg/errors/formatter.go b/pkg/errors/formatter.go index 9b985d1f..ce168903 100644 --- a/pkg/errors/formatter.go +++ b/pkg/errors/formatter.go @@ -21,8 +21,16 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/models" ) -// FormatErrorWithContext formats an error with SQL context and visual indicators -// This is a convenience function that wraps the Error.Error() method +// FormatErrorWithContext formats an error with SQL context and visual indicators. +// For *Error values it delegates to the structured Error.Error() method, which +// includes code, location, SQL context highlighting, hint, and help URL. For all +// other error types it falls back to a plain "Error: " string. +// +// Parameters: +// - err: The error to format (may be *Error or a generic error) +// - sql: The SQL source; currently unused for *Error (context is already attached) +// +// Returns the formatted error string ready for display to end users. func FormatErrorWithContext(err error, sql string) string { // If it's already a structured error, just return its formatted string if structErr, ok := err.(*Error); ok { @@ -33,7 +41,19 @@ func FormatErrorWithContext(err error, sql string) string { return fmt.Sprintf("Error: %v", err) } -// FormatErrorWithContextAt formats an error at a specific location with SQL context +// FormatErrorWithContextAt creates a structured error for the given code and location, +// attaches the SQL context window, and auto-generates a hint. It then returns the +// fully-formatted error string. This is useful for one-shot error formatting without +// retaining the *Error value. +// +// Parameters: +// - code: ErrorCode classifying the error category (e.g., ErrCodeExpectedToken) +// - message: Human-readable description of the error +// - location: Precise line/column where the error occurred +// - sql: Full SQL source used to generate the context window +// - highlightLen: Number of characters to highlight at the error column +// +// Returns the complete formatted error string including context highlighting. func FormatErrorWithContextAt(code ErrorCode, message string, location models.Location, sql string, highlightLen int) string { err := NewError(code, message, location) err = err.WithContext(sql, highlightLen) @@ -46,8 +66,17 @@ func FormatErrorWithContextAt(code ErrorCode, message string, location models.Lo return err.Error() } -// FormatMultiLineContext formats error context for multi-line SQL with extended context -// Shows up to 3 lines (1 before, error line, 1 after) with proper indentation +// FormatMultiLineContext formats an SQL context window around a specific error location. +// It shows up to three lines: one before the error, the error line itself, and one +// after. A caret indicator (^) is rendered below the error column, with optional +// multi-character highlighting when highlightLen > 1. +// +// Parameters: +// - sql: The full SQL source string (may contain newlines) +// - location: The line/column of the error (1-based) +// - highlightLen: Number of characters to highlight; 1 renders a single caret +// +// Returns the formatted context block, or an empty string if location is invalid. func FormatMultiLineContext(sql string, location models.Location, highlightLen int) string { if sql == "" || location.Line <= 0 { return "" @@ -107,8 +136,18 @@ func FormatMultiLineContext(sql string, location models.Location, highlightLen i return sb.String() } -// FormatErrorSummary provides a brief summary of an error without full context -// Useful for logging or when SQL context is not needed +// FormatErrorSummary provides a concise one-line error summary suitable for +// structured logging and monitoring systems where a full context window would +// be too verbose. For *Error values the output format is: +// +// [E2001] unexpected token: COMMA at line 5, column 20 +// +// For other error types the output is "Error: ". +// +// Parameters: +// - err: The error to summarise +// +// Returns the one-line summary string. func FormatErrorSummary(err error) string { if structErr, ok := err.(*Error); ok { return fmt.Sprintf("[%s] %s at line %d, column %d", @@ -120,7 +159,20 @@ func FormatErrorSummary(err error) string { return fmt.Sprintf("Error: %v", err) } -// FormatErrorWithSuggestion formats an error with an intelligent suggestion +// FormatErrorWithSuggestion creates and formats a structured error that includes a +// manually provided hint. When suggestion is empty, the function falls back to +// auto-generating a hint via GenerateHint. This is the preferred formatter when +// the caller already knows the correct fix. +// +// Parameters: +// - code: ErrorCode classifying the error category +// - message: Human-readable description of the error +// - location: Precise line/column where the error occurred +// - sql: Full SQL source used to generate the context window +// - highlightLen: Number of characters to highlight at the error column +// - suggestion: Custom hint text; empty string triggers auto-generation +// +// Returns the complete formatted error string. func FormatErrorWithSuggestion(code ErrorCode, message string, location models.Location, sql string, highlightLen int, suggestion string) string { err := NewError(code, message, location) err = err.WithContext(sql, highlightLen) @@ -137,7 +189,16 @@ func FormatErrorWithSuggestion(code ErrorCode, message string, location models.L return err.Error() } -// FormatErrorList formats multiple errors in a readable list +// FormatErrorList formats a slice of structured errors into a numbered list with +// full context for each entry. The output begins with a count line and separates +// each error with a blank line. +// +// Returns "No errors" when the slice is empty. +// +// Parameters: +// - errors: Slice of *Error values to format +// +// Returns the multi-error report string. func FormatErrorList(errors []*Error) string { if len(errors) == 0 { return "No errors" @@ -155,7 +216,21 @@ func FormatErrorList(errors []*Error) string { return sb.String() } -// FormatErrorWithExample formats an error with a corrected example +// FormatErrorWithExample formats an error and appends a side-by-side "Wrong / Correct" +// example to the hint. This is particularly useful for educational error messages +// (e.g., in linters or IDEs) where showing the correct pattern helps users learn +// the expected SQL syntax. +// +// Parameters: +// - code: ErrorCode classifying the error category +// - message: Human-readable description of the error +// - location: Precise line/column where the error occurred +// - sql: Full SQL source used to generate the context window +// - highlightLen: Number of characters to highlight at the error column +// - wrongExample: The erroneous SQL fragment (e.g., "SELECT * FORM users") +// - correctExample: The corrected SQL fragment (e.g., "SELECT * FROM users") +// +// Returns the complete formatted error string including the before/after example. func FormatErrorWithExample(code ErrorCode, message string, location models.Location, sql string, highlightLen int, wrongExample, correctExample string) string { err := NewError(code, message, location) err = err.WithContext(sql, highlightLen) @@ -167,7 +242,19 @@ func FormatErrorWithExample(code ErrorCode, message string, location models.Loca return err.Error() } -// FormatContextWindow formats a larger context window (up to N lines before and after) +// FormatContextWindow formats a configurable SQL context window of up to linesBefore +// lines before and linesAfter lines after the error line. Prefer this over +// FormatMultiLineContext when more surrounding context is needed (e.g., in IDE +// hover messages or verbose diagnostic reports). +// +// Parameters: +// - sql: The full SQL source string +// - location: The line/column of the error (1-based) +// - highlightLen: Number of characters to highlight at the error column +// - linesBefore: Number of source lines to display before the error line +// - linesAfter: Number of source lines to display after the error line +// +// Returns the formatted context block, or an empty string if location is invalid. func FormatContextWindow(sql string, location models.Location, highlightLen int, linesBefore, linesAfter int) string { if sql == "" || location.Line <= 0 { return "" @@ -231,13 +318,27 @@ func FormatContextWindow(sql string, location models.Location, highlightLen int, return sb.String() } -// IsStructuredError checks if an error is a structured GoSQLX error +// IsStructuredError reports whether err is a GoSQLX *Error value. +// Use this to distinguish GoSQLX structured errors from generic Go errors +// before calling functions that require *Error (e.g., ExtractLocation). +// +// Example: +// +// if errors.IsStructuredError(err) { +// loc, _ := errors.ExtractLocation(err) +// // use loc for IDE diagnostics +// } func IsStructuredError(err error) bool { _, ok := err.(*Error) return ok } -// ExtractLocation extracts location information from an error +// ExtractLocation extracts the line/column location from a GoSQLX *Error. +// This is the preferred way to obtain location data for IDE integrations such as +// LSP diagnostics, since it handles the type assertion safely. +// +// Returns the Location and true when err is a *Error; returns a zero Location +// and false for all other error types. func ExtractLocation(err error) (models.Location, bool) { if structErr, ok := err.(*Error); ok { return structErr.Location, true @@ -245,7 +346,12 @@ func ExtractLocation(err error) (models.Location, bool) { return models.Location{}, false } -// ExtractErrorCode extracts the error code from an error +// ExtractErrorCode extracts the ErrorCode from a GoSQLX *Error. +// Unlike GetCode, this function returns a boolean indicating whether the extraction +// succeeded, making it suitable for use in type-switch–style handling. +// +// Returns the ErrorCode and true when err is a *Error; returns an empty string +// and false for all other error types. func ExtractErrorCode(err error) (ErrorCode, bool) { if structErr, ok := err.(*Error); ok { return structErr.Code, true diff --git a/pkg/errors/hints.go b/pkg/errors/hints.go index 3ef110c8..f9b114ba 100644 --- a/pkg/errors/hints.go +++ b/pkg/errors/hints.go @@ -29,8 +29,17 @@ var commonKeywords = []string{ "DISTINCT", "ALL", "ANY", "SOME", "EXISTS", "ASC", "DESC", } -// SuggestKeyword uses Levenshtein distance to suggest the closest matching keyword. -// Results are cached to improve performance in repeated error scenarios. +// SuggestKeyword uses Levenshtein distance to suggest the closest SQL keyword +// matching the given input token. The suggestion is only returned when the edit +// distance is within half the length of the input (minimum threshold of 2), which +// prevents semantically unrelated tokens from being suggested. +// +// Results are stored in a bounded LRU-style cache shared across all calls to +// improve performance during repeated error-reporting scenarios where the same +// misspelled token appears many times (e.g., in batch query validation). +// +// Returns the matching keyword in uppercase, or an empty string if no sufficiently +// close match is found. func SuggestKeyword(input string) string { input = strings.ToUpper(input) if input == "" { @@ -122,7 +131,18 @@ func min(a, b, c int) int { return c } -// GenerateHint generates an intelligent hint based on the error type and context +// GenerateHint generates an actionable hint message tailored to the error code +// and the tokens involved. For token-mismatch errors it applies SuggestKeyword to +// detect typos and produces a "Did you mean?" message. For structural errors such +// as missing clauses or unsupported features it returns a generic guidance string. +// +// Parameters: +// - code: The ErrorCode that identifies the class of error (e.g., ErrCodeExpectedToken) +// - expected: The token or keyword that was required (used for ErrCodeExpectedToken +// and ErrCodeMissingClause messages) +// - found: The token that was actually present (used for typo detection) +// +// Returns an empty string for error codes that have no pre-defined hint. func GenerateHint(code ErrorCode, expected, found string) string { switch code { case ErrCodeExpectedToken: @@ -162,7 +182,22 @@ func GenerateHint(code ErrorCode, expected, found string) string { } } -// Common error scenarios with pre-built hints +// CommonHints is a pre-built map of human-readable hint messages keyed by a +// short scenario identifier. The map covers the most frequent SQL authoring +// mistakes and is intended for use in error formatters that want to attach a +// contextual hint without invoking the full suggestion pipeline. +// +// The available keys are: +// +// "missing_from" — SELECT without a FROM clause +// "missing_where" — reminder to add a WHERE filter +// "unclosed_paren" — unbalanced parentheses +// "missing_comma" — list items not separated by commas +// "invalid_join" — JOIN clause missing ON or USING +// "duplicate_alias" — non-unique table alias +// "ambiguous_column" — unqualified column reference in multi-table query +// +// Use GetCommonHint for safe lookup with a zero-value fallback. var CommonHints = map[string]string{ "missing_from": "SELECT statements require a FROM clause unless selecting constants", "missing_where": "Add a WHERE clause to filter the results", @@ -173,7 +208,12 @@ var CommonHints = map[string]string{ "ambiguous_column": "Qualify the column name with the table name or alias (e.g., table.column)", } -// GetCommonHint retrieves a pre-defined hint by key +// GetCommonHint retrieves a pre-defined hint message from CommonHints by its +// scenario key. This is the safe alternative to indexing the map directly; it +// returns an empty string instead of the zero value when the key is absent, +// making nil-check patterns unnecessary in callers. +// +// See CommonHints for the full list of valid keys. func GetCommonHint(key string) string { if hint, ok := CommonHints[key]; ok { return hint diff --git a/pkg/errors/suggestions.go b/pkg/errors/suggestions.go index b168c347..08a034f7 100644 --- a/pkg/errors/suggestions.go +++ b/pkg/errors/suggestions.go @@ -20,7 +20,14 @@ import ( "strings" ) -// ErrorPattern represents a common SQL error pattern with suggestions +// ErrorPattern represents a common SQL error pattern with an associated suggestion. +// Each pattern matches error messages (not raw SQL) and provides a human-readable +// hint for fixing the underlying mistake. +// +// Fields: +// - Pattern: Compiled regular expression matched against error message text +// - Description: Short label for the pattern (used in documentation) +// - Suggestion: Actionable fix advice to show to the user type ErrorPattern struct { Pattern *regexp.Regexp Description string @@ -71,7 +78,16 @@ var errorPatterns = []ErrorPattern{ }, } -// MistakePattern represents common SQL mistakes with explanations +// MistakePattern represents a catalogued SQL coding mistake together with a +// corrected example and a plain-language explanation. The catalogue covers 20+ +// common mistakes including aggregate misuse, window function syntax, CTE problems, +// and set operation mismatches. +// +// Fields: +// - Name: Machine-readable key used to look up the pattern (e.g., "missing_group_by") +// - Example: A minimal SQL fragment that demonstrates the mistake +// - Correct: The corrected version of the same fragment +// - Explanation: Human-readable explanation of why Example is wrong and Correct is right type MistakePattern struct { Name string Example string // Example of the mistake @@ -203,7 +219,12 @@ var commonMistakes = []MistakePattern{ }, } -// SuggestFromPattern tries to match error message against known patterns +// SuggestFromPattern tries to match an error message string against the built-in +// errorPatterns catalogue and returns the associated suggestion. This is useful for +// augmenting generic error messages with actionable advice without re-parsing the +// original SQL. +// +// Returns the suggestion string if a pattern matches, or empty string if none match. func SuggestFromPattern(errorMessage string) string { for _, pattern := range errorPatterns { if pattern.Pattern.MatchString(errorMessage) { @@ -213,7 +234,13 @@ func SuggestFromPattern(errorMessage string) string { return "" } -// GetMistakeExplanation returns explanation for a common mistake +// GetMistakeExplanation looks up a MistakePattern by its machine-readable name. +// Use this to retrieve full before/after examples and explanations for known SQL +// anti-patterns. The name must exactly match one of the keys in the commonMistakes +// catalogue (e.g., "missing_group_by", "window_function_without_over"). +// +// Returns the matching MistakePattern and true, or a zero value and false when +// the name is not found. func GetMistakeExplanation(mistakeName string) (MistakePattern, bool) { for _, mistake := range commonMistakes { if mistake.Name == mistakeName { @@ -223,7 +250,17 @@ func GetMistakeExplanation(mistakeName string) (MistakePattern, bool) { return MistakePattern{}, false } -// AnalyzeTokenError analyzes token-based errors and provides context-aware suggestions +// AnalyzeTokenError produces a context-aware suggestion string for token-level +// parse errors. It inspects the actual and expected token types to provide specific +// guidance — for example, detecting when a quoted string is used where a number is +// expected, or when an unknown identifier looks like a misspelled keyword. +// +// Parameters: +// - tokenType: Type of the unexpected token (e.g., "STRING", "NUMBER", "IDENT") +// - tokenValue: Raw text value of the unexpected token +// - expectedType: Token type the parser was expecting (e.g., "NUMBER", "RPAREN") +// +// Returns a human-readable suggestion string; never returns empty string. func AnalyzeTokenError(tokenType, tokenValue, expectedType string) string { // String literal where number expected if tokenType == "STRING" && (expectedType == "NUMBER" || expectedType == "INTEGER") { @@ -257,7 +294,14 @@ func AnalyzeTokenError(tokenType, tokenValue, expectedType string) string { return fmt.Sprintf("Expected %s but found %s. Review the SQL syntax at this position.", expectedType, tokenType) } -// SuggestForIncompleteStatement provides suggestions for incomplete SQL statements +// SuggestForIncompleteStatement returns a suggestion string explaining what tokens +// or clauses are expected to follow the given SQL keyword. This is used when the +// parser encounters an unexpected end-of-input after a keyword. +// +// Parameters: +// - lastKeyword: The last SQL keyword seen before end-of-input (e.g., "SELECT", "FROM") +// +// Returns the context-appropriate completion hint, or a generic fallback message. func SuggestForIncompleteStatement(lastKeyword string) string { suggestions := map[string]string{ "SELECT": "Add columns to select and FROM clause: SELECT columns FROM table", @@ -285,7 +329,15 @@ func SuggestForIncompleteStatement(lastKeyword string) string { return "Complete the SQL statement with required clauses and syntax." } -// SuggestForSyntaxError provides context-aware suggestions for syntax errors +// SuggestForSyntaxError returns a context-aware suggestion string for a syntax error. +// It inspects the surrounding SQL context (e.g., whether a SELECT or JOIN keyword +// is present) and the expected token to provide targeted guidance. +// +// Parameters: +// - context: A snippet of the SQL surrounding the error (used for keyword detection) +// - expectedToken: The token or clause that was expected (e.g., "FROM", ",", "ON") +// +// Returns a human-readable hint specific to the context, or a generic fallback. func SuggestForSyntaxError(context, expectedToken string) string { contextUpper := strings.ToUpper(context) @@ -333,7 +385,16 @@ func SuggestForSyntaxError(context, expectedToken string) string { return fmt.Sprintf("Check SQL syntax. Expected %s in this context.", expectedToken) } -// GenerateDidYouMean generates "Did you mean?" suggestions for typos +// GenerateDidYouMean generates a "Did you mean?" suggestion by finding the closest +// match(es) to actual in possibleValues using Levenshtein distance. It only returns +// a suggestion when the edit distance is within half the length of actual (minimum +// threshold of 2), preventing spurious suggestions for completely unrelated words. +// +// Parameters: +// - actual: The misspelled or unrecognised word entered by the user +// - possibleValues: Candidate correct values to compare against +// +// Returns a suggestion string, or empty string if no close match is found. func GenerateDidYouMean(actual string, possibleValues []string) string { if len(possibleValues) == 0 { return "" @@ -370,7 +431,17 @@ func GenerateDidYouMean(actual string, possibleValues []string) string { return "" } -// FormatMistakeExample formats a mistake pattern for display +// FormatMistakeExample formats a MistakePattern into a human-readable multi-line +// block suitable for displaying in error messages, documentation, or interactive +// tutorials. The output includes the mistake name, the wrong SQL snippet, the +// corrected SQL snippet, and an explanation. +// +// Example output: +// +// Common Mistake: missing_group_by +// Wrong: SELECT dept, COUNT(*) FROM employees +// Right: SELECT dept, COUNT(*) FROM employees GROUP BY dept +// Explanation: Non-aggregated columns in SELECT must appear in GROUP BY func FormatMistakeExample(mistake MistakePattern) string { var sb strings.Builder sb.WriteString(fmt.Sprintf("Common Mistake: %s\n", mistake.Name)) @@ -380,7 +451,15 @@ func FormatMistakeExample(mistake MistakePattern) string { return sb.String() } -// SuggestForWindowFunction provides suggestions for window function errors +// SuggestForWindowFunction provides targeted suggestions for window function syntax +// errors. It inspects the SQL context to detect common mistakes such as a missing +// OVER clause, PARTITION BY outside OVER, or a window frame without ORDER BY. +// +// Parameters: +// - context: A SQL snippet containing the window function usage +// - functionName: The name of the window function (e.g., "ROW_NUMBER", "SUM") +// +// Returns a specific remediation hint for the detected problem. func SuggestForWindowFunction(context, functionName string) string { contextUpper := strings.ToUpper(context) @@ -403,7 +482,15 @@ func SuggestForWindowFunction(context, functionName string) string { return "Check window function syntax: function_name OVER ([PARTITION BY ...] [ORDER BY ...] [frame_clause])" } -// SuggestForCTE provides suggestions for Common Table Expression errors +// SuggestForCTE provides targeted suggestions for Common Table Expression (WITH +// clause) syntax errors. It detects problems such as a WITH clause not followed by +// a DML statement, a RECURSIVE CTE missing UNION, or multiple CTEs not separated +// by commas. +// +// Parameters: +// - context: A SQL snippet containing the WITH clause and surrounding context +// +// Returns a specific remediation hint for the detected problem. func SuggestForCTE(context string) string { contextUpper := strings.ToUpper(context) @@ -426,7 +513,15 @@ func SuggestForCTE(context string) string { return "Check CTE syntax: WITH cte_name AS (query) SELECT ... or WITH RECURSIVE cte AS (base UNION ALL recursive) ..." } -// SuggestForSetOperation provides suggestions for UNION/INTERSECT/EXCEPT errors +// SuggestForSetOperation provides targeted suggestions for UNION, INTERSECT, and +// EXCEPT syntax errors. It detects ORDER BY inside a subquery (which should be +// after the full set operation) and column count/type mismatches. +// +// Parameters: +// - operation: The set operation keyword (e.g., "UNION", "INTERSECT", "EXCEPT") +// - context: A SQL snippet containing the set operation +// +// Returns a specific remediation hint for the detected problem. func SuggestForSetOperation(operation, context string) string { contextUpper := strings.ToUpper(context) @@ -443,7 +538,15 @@ func SuggestForSetOperation(operation, context string) string { return fmt.Sprintf("Check %s syntax: SELECT ... %s SELECT ... [ORDER BY ...]", operation, operation) } -// SuggestForJoinError provides enhanced suggestions for JOIN-related errors +// SuggestForJoinError provides targeted suggestions for JOIN syntax errors. It +// detects missing ON or USING conditions (noting that CROSS JOIN is the sole +// exception) and ambiguous column references in join conditions. +// +// Parameters: +// - joinType: The JOIN type keyword (e.g., "INNER", "LEFT", "CROSS") +// - context: A SQL snippet containing the JOIN clause +// +// Returns a specific remediation hint for the detected problem. func SuggestForJoinError(joinType, context string) string { contextUpper := strings.ToUpper(context) @@ -464,7 +567,13 @@ func SuggestForJoinError(joinType, context string) string { return fmt.Sprintf("Check %s JOIN syntax: FROM table1 %s JOIN table2 ON condition", joinType, joinType) } -// GetAdvancedFeatureHint returns hints for advanced SQL features +// GetAdvancedFeatureHint returns a brief description and usage hint for an advanced +// SQL feature. Supported feature keys include: "window_functions", "cte", +// "recursive_cte", "set_operations", "window_frames", "partition_by", +// "lateral_join", and "grouping_sets". +// +// The feature name is normalised to lowercase with spaces replaced by underscores +// before lookup. Returns a generic documentation link if the feature is not found. func GetAdvancedFeatureHint(feature string) string { hints := map[string]string{ "window_functions": "Window functions: ROW_NUMBER(), RANK(), DENSE_RANK(), LAG(), LEAD(), SUM() OVER (), etc.", diff --git a/pkg/formatter/compat.go b/pkg/formatter/compat.go index 5ca04d9d..773070dc 100644 --- a/pkg/formatter/compat.go +++ b/pkg/formatter/compat.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package formatter — backward-compatibility hook registration. +// compat.go — backward-compatibility hook registration. // // This init() function registers the pkg/formatter rendering functions into the // ast package's FormatStatementFunc / FormatExpressionFunc / FormatASTFunc @@ -24,6 +24,7 @@ // which is the common case for any application that uses the formatter. Callers // that only import pkg/sql/ast (without pkg/formatter) will receive SQL() // fallback output from the deprecated shims. + package formatter import ( diff --git a/pkg/formatter/formatter.go b/pkg/formatter/formatter.go index 8e448b8f..d241334a 100644 --- a/pkg/formatter/formatter.go +++ b/pkg/formatter/formatter.go @@ -12,12 +12,81 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package formatter provides a public API for formatting SQL strings. +// Package formatter provides a public API for formatting and pretty-printing SQL strings. // -// Usage: +// The formatter parses SQL using the GoSQLX tokenizer and parser, then renders +// the resulting AST back to text using a visitor-based renderer. This approach +// guarantees that output is syntactically valid and consistently styled. The +// package follows the same separation of concerns as go/ast and go/printer: +// AST nodes carry no formatting logic; all rendering is handled here. // -// f := formatter.New(formatter.Options{IndentSize: 2, Uppercase: true}) -// formatted, err := f.Format("select id,name from users where id=1") +// # Key Types and Functions +// +// The primary entry point is the Formatter type, configured with Options: +// +// - Formatter — stateful formatter created with New(Options); call Format(sql) to reformat a query. +// - Options — controls IndentSize (spaces per level), Uppercase (keyword case), and Compact (single-line output). +// - FormatString — convenience function for one-shot formatting with default options (2-space indent, lowercase keywords, readable multi-line output). +// - FormatAST — low-level renderer that accepts a parsed *ast.AST and ast.FormatOptions directly. +// - FormatStatement / FormatExpression — render individual AST nodes; used by the LSP formatter and linter auto-fix. +// +// # Formatting Styles +// +// Two preset styles from the ast package drive the renderer: +// +// - ast.ReadableStyle() — multi-line output with uppercase keywords, 2-space indentation, and a trailing semicolon per statement. This is the default style used by Formatter when Compact is false. +// - ast.CompactStyle() — single-line output with no indentation, suitable for logging or wire transmission. +// +// Custom styles can be built by constructing an ast.FormatOptions value directly +// and calling FormatAST. +// +// # Basic Usage +// +// import "github.com/ajitpratap0/GoSQLX/pkg/formatter" +// +// // One-shot formatting with default options +// out, err := formatter.FormatString("select id,name from users where id=1") +// // out: "select id, name\nfrom users\nwhere id = 1" +// +// // Configurable formatting +// f := formatter.New(formatter.Options{IndentSize: 4, Uppercase: true}) +// out, err := f.Format("select id,name from users where id=1") +// // out: "SELECT id, name\nFROM users\nWHERE id = 1" +// +// // Compact single-line output +// f := formatter.New(formatter.Options{Compact: true, Uppercase: true}) +// out, err := f.Format("SELECT id, name FROM users WHERE id = 1") +// // out: "SELECT id, name FROM users WHERE id = 1" +// +// # Supported Statement Types +// +// The renderer handles all GoSQLX-supported statement types: +// +// - DML: SELECT (including CTEs, window functions, set operations), INSERT, UPDATE, DELETE +// - DDL: CREATE TABLE, CREATE INDEX, CREATE VIEW, CREATE MATERIALIZED VIEW, ALTER TABLE, DROP, TRUNCATE +// - Advanced: MERGE, REFRESH MATERIALIZED VIEW +// +// # Comment Preservation +// +// Comments captured by the tokenizer are attached to the AST and re-emitted +// by FormatAST. Leading (non-inline) comments appear before the query; inline +// comments are appended after the last statement. +// +// # Backward Compatibility +// +// Importing this package automatically wires the visitor-based renderer into +// the ast package's FormatStatementFunc, FormatExpressionFunc, and FormatASTFunc +// variables via an init() function in compat.go. This allows deprecated +// Format(FormatOptions) methods on AST nodes to delegate here without creating +// an import cycle. Callers that import only pkg/sql/ast receive a fallback +// SQL() string output from those deprecated shims. +// +// # Object Pool Usage +// +// Format internally uses the GoSQLX tokenizer and parser object pools for +// efficient memory reuse. The Formatter type is safe for reuse but not for +// concurrent use from multiple goroutines; create one Formatter per goroutine +// or protect shared access with a mutex. package formatter import ( diff --git a/pkg/formatter/render.go b/pkg/formatter/render.go index dca05367..df655990 100644 --- a/pkg/formatter/render.go +++ b/pkg/formatter/render.go @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package formatter provides visitor-based SQL rendering for AST nodes. -// +// render.go — visitor-based SQL rendering for AST nodes. // All formatting logic lives here; AST nodes are pure data (no Format methods). // This follows the go/ast + go/printer separation pattern. + package formatter import ( diff --git a/pkg/gosqlx/doc.go b/pkg/gosqlx/doc.go index f3381aa7..7e912eb0 100644 --- a/pkg/gosqlx/doc.go +++ b/pkg/gosqlx/doc.go @@ -13,10 +13,11 @@ // limitations under the License. // Package gosqlx provides high-level convenience functions for SQL parsing, validation, -// and metadata extraction. +// formatting, and metadata extraction, with automatic object pool management. // // GoSQLX is a production-ready, high-performance SQL parsing SDK for Go that supports // multiple SQL dialects with comprehensive SQL-99 and SQL:2003 feature support. +// The primary entry points are Parse, Validate, Format, and ParseWithDialect. // // # Overview // @@ -25,6 +26,14 @@ // internally, making it ideal for applications that prioritize ease of use over // fine-grained performance control. // +// Key functions: +// - Parse: tokenizes and parses SQL, returning an *ast.AST +// - Validate: checks whether SQL is syntactically valid without building a full AST +// - Format: formats SQL text according to configurable style options +// - ParseWithDialect: parses SQL using dialect-specific rules (PostgreSQL, MySQL, etc.) +// - ParseMultiple: efficiently parses a batch of SQL statements by reusing pooled objects +// - ParseWithContext: parse with context support for cancellation and timeouts +// // For performance-critical applications requiring fine-grained control over object // lifecycle and pooling, use the lower-level APIs in pkg/sql/tokenizer and pkg/sql/parser // directly. diff --git a/pkg/gosqlx/gosqlx.go b/pkg/gosqlx/gosqlx.go index b79c5b89..eb9f951c 100644 --- a/pkg/gosqlx/gosqlx.go +++ b/pkg/gosqlx/gosqlx.go @@ -12,56 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package gosqlx provides high-level convenience functions for SQL parsing, validation, -// and metadata extraction with automatic object pool management. -// -// This package is the primary entry point for most applications using GoSQLX. -// It wraps the lower-level tokenizer and parser APIs to provide a simple, ergonomic -// interface for common SQL operations. All object pool management is handled internally. -// -// # Performance Characteristics (v1.6.0) -// -// - Throughput: 1.38M+ operations/second sustained, 1.5M+ peak -// - Latency: <1μs for complex queries with window functions -// - Memory: 60-80% reduction through intelligent object pooling -// - Thread Safety: Race-free, validated with 20,000+ concurrent operations -// -// # Quick Start -// -// Parse SQL and get AST: -// -// sql := "SELECT u.name, o.total FROM users u JOIN orders o ON u.id = o.user_id" -// ast, err := gosqlx.Parse(sql) -// if err != nil { -// log.Fatal(err) -// } -// -// Extract metadata from SQL: -// -// metadata := gosqlx.ExtractMetadata(ast) -// fmt.Printf("Tables: %v, Columns: %v\n", metadata.Tables, metadata.Columns) -// -// # For Performance-Critical Applications -// -// For batch processing or performance-critical code that needs fine-grained control -// over object lifecycle and pooling, use the lower-level APIs in pkg/sql/tokenizer -// and pkg/sql/parser directly: -// -// // Manual object pool management -// tkz := tokenizer.GetTokenizer() -// defer tokenizer.PutTokenizer(tkz) -// -// p := parser.GetParser() -// defer parser.PutParser(p) -// -// // Reuse objects for multiple queries -// for _, sql := range queries { -// tkz.Reset() -// tokens, _ := tkz.Tokenize([]byte(sql)) -// ast, _ := p.Parse(tokens) -// } -// -// See package documentation (doc.go) for complete feature list and usage examples. package gosqlx import ( @@ -559,7 +509,21 @@ type FormatOptions struct { SingleLineLimit int } -// DefaultFormatOptions returns the default formatting options. +// DefaultFormatOptions returns a FormatOptions value with sensible defaults. +// +// The defaults are: +// - IndentSize: 2 spaces per indent level +// - UppercaseKeywords: false (preserve original case) +// - AddSemicolon: false (preserve original termination) +// - SingleLineLimit: 80 characters +// +// Use the returned value as a starting point and override individual fields +// to match your project's SQL style guide: +// +// opts := gosqlx.DefaultFormatOptions() +// opts.UppercaseKeywords = true // enforce UPPERCASE keywords +// opts.AddSemicolon = true // always terminate with ; +// formatted, err := gosqlx.Format(sql, opts) func DefaultFormatOptions() FormatOptions { return FormatOptions{ IndentSize: 2, diff --git a/pkg/gosqlx/testing/doc.go b/pkg/gosqlx/testing/doc.go index 3cc857f8..fc5447f7 100644 --- a/pkg/gosqlx/testing/doc.go +++ b/pkg/gosqlx/testing/doc.go @@ -13,11 +13,12 @@ // limitations under the License. /* -Package testing provides comprehensive test helpers for SQL parsing validation. +Package testing provides comprehensive SQL parsing test helpers for use in Go test suites. -This package offers convenient assertion and requirement functions for testing SQL -parsing, formatting, and metadata extraction in Go test suites. It integrates -seamlessly with Go's standard testing package and follows patterns similar to +This package offers assertion and requirement functions — including AssertValidSQL, +AssertInvalidSQL, RequireParse, AssertTables, AssertColumns, AssertParsesTo, and +AssertErrorContains — for validating SQL parsing, formatting, and metadata extraction. +It integrates seamlessly with Go's standard testing package and follows patterns similar to testify/assert and testify/require. # Overview diff --git a/pkg/gosqlx/testing/testing.go b/pkg/gosqlx/testing/testing.go index 861c9f80..dfb7250a 100644 --- a/pkg/gosqlx/testing/testing.go +++ b/pkg/gosqlx/testing/testing.go @@ -12,26 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package testing provides helper functions for testing SQL parsing in Go tests. -// -// This package offers convenient assertion and requirement functions for validating -// SQL parsing, formatting, and metadata extraction in your test suites. It integrates -// seamlessly with Go's standard testing package and follows similar patterns to -// testify/assert. -// -// Example usage: -// -// func TestMySQL(t *testing.T) { -// testing.AssertValidSQL(t, "SELECT * FROM users") -// testing.AssertInvalidSQL(t, "SELECT FROM") -// testing.AssertTables(t, "SELECT * FROM users u JOIN orders o", []string{"users", "orders"}) -// } -// -// Key features: -// - Clear, descriptive error messages with SQL context -// - Proper test failure reporting with t.Helper() -// - Support for both assertion (test continues) and requirement (test stops) styles -// - Metadata extraction helpers (tables, columns) +// Package testing provides SQL parsing test helpers; see doc.go for full documentation. package testing import ( diff --git a/pkg/models/comment.go b/pkg/models/comment.go index fe4c31fc..da95183b 100644 --- a/pkg/models/comment.go +++ b/pkg/models/comment.go @@ -12,24 +12,74 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package models provides the Comment type for SQL comment preservation. package models -// CommentStyle indicates the type of SQL comment. +// CommentStyle indicates the type of SQL comment syntax used. +// +// There are two styles of SQL comments: single-line comments introduced with -- +// and multi-line block comments delimited by /* and */. +// +// Example: +// +// // Single-line comment +// -- This is a line comment +// +// // Multi-line block comment +// /* This is a +// block comment */ type CommentStyle int const ( - // LineComment represents a -- single-line comment. + // LineComment represents a -- single-line comment that extends to the end of the line. LineComment CommentStyle = iota - // BlockComment represents a /* multi-line */ comment. + // BlockComment represents a /* multi-line */ comment that can span multiple lines. BlockComment ) // Comment represents a SQL comment captured during tokenization. +// +// Comments are preserved by the tokenizer for use by formatters, LSP servers, +// and other tools that need to maintain the original SQL structure. Both +// single-line (--) and multi-line (/* */) comment styles are supported. +// +// Fields: +// - Text: Complete comment text including delimiters (e.g., "-- foo" or "/* bar */") +// - Style: Whether this is a line or block comment +// - Start: Source position where the comment begins (inclusive, 1-based) +// - End: Source position where the comment ends (exclusive, 1-based) +// - Inline: True when the comment appears on the same line as SQL code (trailing comment) +// +// Example: +// +// // Trailing line comment +// comment := models.Comment{ +// Text: "-- filter active users", +// Style: models.LineComment, +// Start: models.Location{Line: 3, Column: 30}, +// End: models.Location{Line: 3, Column: 52}, +// Inline: true, +// } +// +// // Stand-alone block comment +// comment := models.Comment{ +// Text: "/* Returns all active users */", +// Style: models.BlockComment, +// Start: models.Location{Line: 1, Column: 1}, +// End: models.Location{Line: 1, Column: 30}, +// Inline: false, +// } type Comment struct { - Text string // The comment text including delimiters (e.g., "-- foo" or "/* bar */") - Style CommentStyle // Line or block comment - Start Location // Start position in source - End Location // End position in source - Inline bool // True if the comment is on the same line as code (trailing) + // Text is the full comment text including its delimiters. + // For line comments: includes the leading "--" (e.g., "-- my comment"). + // For block comments: includes "/*" and "*/" delimiters (e.g., "/* my comment */"). + Text string + // Style indicates whether this is a LineComment (--) or BlockComment (/* */). + Style CommentStyle + // Start is the 1-based source location where the comment begins (inclusive). + Start Location + // End is the 1-based source location where the comment ends (exclusive). + End Location + // Inline is true when the comment appears on the same source line as SQL code, + // i.e., it is a trailing comment following a statement or clause. + Inline bool } diff --git a/pkg/models/doc.go b/pkg/models/doc.go index 38ba3d1f..3d355f4b 100644 --- a/pkg/models/doc.go +++ b/pkg/models/doc.go @@ -12,7 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package models provides core data structures for SQL tokenization and parsing in GoSQLX v1.6.0. +// Package models provides the core data structures for SQL tokenization and parsing in GoSQLX. +// +// The fundamental types are Token (a single lexical unit with Type and Value), TokenWithSpan +// (a Token paired with Start/End Location for precise source positions), Location (1-based +// line/column coordinates), Span (a source range from one Location to another), and +// TokenizerError (structured error with position information). TokenType is an integer +// enumeration that covers all SQL keywords, operators, literals, and punctuation, enabling +// O(1) switch-based dispatch throughout the tokenizer and parser. // // This package contains the fundamental types used throughout the GoSQLX library for representing // SQL tokens, their locations in source code, and tokenization errors. All types are designed with diff --git a/pkg/models/token.go b/pkg/models/token.go index 147c2544..6d794332 100644 --- a/pkg/models/token.go +++ b/pkg/models/token.go @@ -12,13 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package models provides core data structures for SQL tokenization and parsing, -// including tokens, spans, locations, and error types. -// -// This package is the foundation of GoSQLX v1.6.0, providing high-performance, -// zero-copy token types with comprehensive PostgreSQL and SQL standard support. -// -// See doc.go for complete package documentation and examples. package models // Token represents a SQL token with its value and metadata. @@ -52,11 +45,21 @@ package models // Performance: Tokens are stack-allocated value types with minimal memory overhead. // Used extensively with sync.Pool for zero-allocation parsing in hot paths. type Token struct { - Type TokenType + // Type is the TokenType classification of this token (e.g., TokenTypeSelect, + // TokenTypeNumber, TokenTypeArrow). Use Type for all category checks. + Type TokenType + // Value is the raw string representation of the token as it appeared in the + // SQL source (e.g., "SELECT", "42", "'hello'", "->"). Value string - Word *Word // For TokenTypeWord - Long bool // For TokenTypeNumber to indicate if it's a long number - Quote rune // For quoted strings and identifiers + // Word holds keyword or identifier metadata for TokenTypeWord tokens. + // It is nil for all other token types. + Word *Word + // Long is true for TokenTypeNumber tokens whose value exceeds the range of + // a 32-bit integer and must be interpreted as int64. + Long bool + // Quote is the quote character used to delimit the token, for quoted string + // literals (') and quoted identifiers (", `, [). Zero for unquoted tokens. + Quote rune } // Word represents a keyword or identifier with its properties. @@ -84,9 +87,14 @@ type Token struct { // QuoteStyle: '"', // } type Word struct { - Value string // The actual text value - QuoteStyle rune // The quote character used (if quoted) - Keyword *Keyword // If this word is a keyword + // Value is the actual text of the word in its original case (e.g., "SELECT", "users"). + Value string + // QuoteStyle is the quote character used to delimit a quoted identifier (", `, [). + // Zero for unquoted words. + QuoteStyle rune + // Keyword holds SQL keyword metadata when this word is a recognized SQL keyword. + // It is nil for plain identifiers (table names, column names, aliases). + Keyword *Keyword } // Keyword represents a lexical keyword with its properties. @@ -112,8 +120,12 @@ type Word struct { // - RETURNING: Return modified rows from INSERT/UPDATE/DELETE // - FILTER: Conditional aggregation in window functions type Keyword struct { - Word string // The actual keyword text - Reserved bool // Whether this is a reserved keyword + // Word is the keyword text in its canonical uppercase form (e.g., "SELECT", "LATERAL"). + Word string + // Reserved is true for keywords that cannot be used as unquoted identifiers + // (e.g., SELECT, FROM, WHERE) and false for non-reserved keywords + // (e.g., RETURNING, LATERAL, FILTER) that are valid as identifiers in some dialects. + Reserved bool } // Whitespace represents different types of whitespace tokens. @@ -142,9 +154,14 @@ type Keyword struct { // Content: "/* Block comment */", // } type Whitespace struct { - Type WhitespaceType - Content string // For comments - Prefix string // For single line comments + // Type identifies whether this is a space, newline, tab, or comment. + Type WhitespaceType + // Content holds the text of a comment, including its delimiters. + // Empty for non-comment whitespace (spaces, newlines, tabs). + Content string + // Prefix holds the comment introducer for single-line comments ("--" or "#"). + // Empty for block comments and non-comment whitespace. + Prefix string } // WhitespaceType represents the type of whitespace. diff --git a/pkg/models/token_type.go b/pkg/models/token_type.go index 819a6480..c66cfd63 100644 --- a/pkg/models/token_type.go +++ b/pkg/models/token_type.go @@ -95,334 +95,689 @@ const ( TokenRangeDataTypeEnd TokenType = 450 ) -// Token type constants with explicit values to avoid collisions +// Token type constants with explicit values to avoid collisions. +// +// Constants are assigned explicit numeric values to guarantee stability across +// versions. Adding new token types must not change existing values. const ( - // Special tokens - TokenTypeEOF TokenType = 0 + // TokenTypeEOF marks the end of the SQL input stream. Every token slice + // produced by the tokenizer is terminated with an EOF token. + TokenTypeEOF TokenType = 0 + // TokenTypeUnknown is assigned to tokens that cannot be classified. This + // typically signals a tokenizer bug or an unsupported input character. TokenTypeUnknown TokenType = 1 // Basic token types (10-29) - TokenTypeWord TokenType = 10 - TokenTypeNumber TokenType = 11 - TokenTypeChar TokenType = 12 - TokenTypeWhitespace TokenType = 13 - TokenTypeIdentifier TokenType = 14 + + // TokenTypeWord represents an unquoted keyword or identifier word token. + // The Token.Word field holds keyword metadata when the word is a SQL keyword. + TokenTypeWord TokenType = 10 + // TokenTypeNumber represents a numeric literal (integer or floating-point). + // Token.Long is true when the value requires int64 representation. + TokenTypeNumber TokenType = 11 + // TokenTypeChar represents a single character token that does not fit other categories. + TokenTypeChar TokenType = 12 + // TokenTypeWhitespace represents whitespace (spaces, newlines, tabs) or comments. + TokenTypeWhitespace TokenType = 13 + // TokenTypeIdentifier represents a quoted identifier such as "column name" or `table`. + TokenTypeIdentifier TokenType = 14 + // TokenTypePlaceholder represents a query parameter placeholder such as ? or $1. TokenTypePlaceholder TokenType = 15 // String literals (30-49) - TokenTypeString TokenType = 30 // Generic string type - TokenTypeSingleQuotedString TokenType = 31 - TokenTypeDoubleQuotedString TokenType = 32 + + // TokenTypeString is the generic string literal type used when the specific + // quoting style is not important (e.g., for dialect-agnostic processing). + TokenTypeString TokenType = 30 + // TokenTypeSingleQuotedString represents a SQL string literal enclosed in single quotes: 'value'. + TokenTypeSingleQuotedString TokenType = 31 + // TokenTypeDoubleQuotedString represents a string enclosed in double quotes: "value". + // In standard SQL this is a quoted identifier; in MySQL it can be a string literal. + TokenTypeDoubleQuotedString TokenType = 32 + // TokenTypeTripleSingleQuotedString represents a string enclosed in triple single quotes: '''value'''. TokenTypeTripleSingleQuotedString TokenType = 33 + // TokenTypeTripleDoubleQuotedString represents a string enclosed in triple double quotes: """value""". TokenTypeTripleDoubleQuotedString TokenType = 34 - TokenTypeDollarQuotedString TokenType = 35 - TokenTypeByteStringLiteral TokenType = 36 - TokenTypeNationalStringLiteral TokenType = 37 - TokenTypeEscapedStringLiteral TokenType = 38 - TokenTypeUnicodeStringLiteral TokenType = 39 - TokenTypeHexStringLiteral TokenType = 40 + // TokenTypeDollarQuotedString represents a PostgreSQL dollar-quoted string: $$value$$ or $tag$value$tag$. + TokenTypeDollarQuotedString TokenType = 35 + // TokenTypeByteStringLiteral represents a byte string literal such as b'bytes' (BigQuery). + TokenTypeByteStringLiteral TokenType = 36 + // TokenTypeNationalStringLiteral represents an ANSI national character set string: N'value'. + TokenTypeNationalStringLiteral TokenType = 37 + // TokenTypeEscapedStringLiteral represents a PostgreSQL escaped string: E'value\n'. + TokenTypeEscapedStringLiteral TokenType = 38 + // TokenTypeUnicodeStringLiteral represents an ANSI Unicode string: U&'value'. + TokenTypeUnicodeStringLiteral TokenType = 39 + // TokenTypeHexStringLiteral represents a hexadecimal string literal: X'DEADBEEF'. + TokenTypeHexStringLiteral TokenType = 40 // Operators and punctuation (50-99) - TokenTypeOperator TokenType = 50 // Generic operator - TokenTypeComma TokenType = 51 - TokenTypeEq TokenType = 52 - TokenTypeDoubleEq TokenType = 53 - TokenTypeNeq TokenType = 54 - TokenTypeLt TokenType = 55 - TokenTypeGt TokenType = 56 - TokenTypeLtEq TokenType = 57 - TokenTypeGtEq TokenType = 58 - TokenTypeSpaceship TokenType = 59 - TokenTypePlus TokenType = 60 - TokenTypeMinus TokenType = 61 - TokenTypeMul TokenType = 62 - TokenTypeDiv TokenType = 63 - TokenTypeDuckIntDiv TokenType = 64 - TokenTypeMod TokenType = 65 - TokenTypeStringConcat TokenType = 66 - TokenTypeLParen TokenType = 67 - TokenTypeLeftParen TokenType = 67 // Alias for compatibility - TokenTypeRParen TokenType = 68 - TokenTypeRightParen TokenType = 68 // Alias for compatibility - TokenTypePeriod TokenType = 69 - TokenTypeDot TokenType = 69 // Alias for compatibility - TokenTypeColon TokenType = 70 - TokenTypeDoubleColon TokenType = 71 - TokenTypeAssignment TokenType = 72 - TokenTypeSemicolon TokenType = 73 - TokenTypeBackslash TokenType = 74 - TokenTypeLBracket TokenType = 75 - TokenTypeRBracket TokenType = 76 - TokenTypeAmpersand TokenType = 77 - TokenTypePipe TokenType = 78 - TokenTypeCaret TokenType = 79 - TokenTypeLBrace TokenType = 80 - TokenTypeRBrace TokenType = 81 - TokenTypeRArrow TokenType = 82 - TokenTypeSharp TokenType = 83 - TokenTypeTilde TokenType = 84 + + // TokenTypeOperator is the generic operator type for operators not covered by a more specific constant. + TokenTypeOperator TokenType = 50 + // TokenTypeComma represents the , separator used in lists and clauses. + TokenTypeComma TokenType = 51 + // TokenTypeEq represents the = equality or assignment operator. + TokenTypeEq TokenType = 52 + // TokenTypeDoubleEq represents the == equality operator (MySQL, SQLite). + TokenTypeDoubleEq TokenType = 53 + // TokenTypeNeq represents the <> or != inequality operator. + TokenTypeNeq TokenType = 54 + // TokenTypeLt represents the < less-than comparison operator. + TokenTypeLt TokenType = 55 + // TokenTypeGt represents the > greater-than comparison operator. + TokenTypeGt TokenType = 56 + // TokenTypeLtEq represents the <= less-than-or-equal comparison operator. + TokenTypeLtEq TokenType = 57 + // TokenTypeGtEq represents the >= greater-than-or-equal comparison operator. + TokenTypeGtEq TokenType = 58 + // TokenTypeSpaceship represents the <=> NULL-safe equality operator (MySQL). + TokenTypeSpaceship TokenType = 59 + // TokenTypePlus represents the + addition operator. + TokenTypePlus TokenType = 60 + // TokenTypeMinus represents the - subtraction or negation operator. + TokenTypeMinus TokenType = 61 + // TokenTypeMul represents the * multiplication operator or SELECT * wildcard. + TokenTypeMul TokenType = 62 + // TokenTypeDiv represents the / division operator. + TokenTypeDiv TokenType = 63 + // TokenTypeDuckIntDiv represents the // integer division operator (DuckDB). + TokenTypeDuckIntDiv TokenType = 64 + // TokenTypeMod represents the % modulo operator. + TokenTypeMod TokenType = 65 + // TokenTypeStringConcat represents the || string concatenation operator (SQL standard). + TokenTypeStringConcat TokenType = 66 + // TokenTypeLParen represents the ( left parenthesis. + TokenTypeLParen TokenType = 67 + TokenTypeLeftParen TokenType = 67 // TokenTypeLeftParen is an alias for TokenTypeLParen for backward compatibility. + // TokenTypeRParen represents the ) right parenthesis. + TokenTypeRParen TokenType = 68 + TokenTypeRightParen TokenType = 68 // TokenTypeRightParen is an alias for TokenTypeRParen for backward compatibility. + // TokenTypePeriod represents the . dot/period used for qualified names (schema.table.column). + TokenTypePeriod TokenType = 69 + TokenTypeDot TokenType = 69 // TokenTypeDot is an alias for TokenTypePeriod for backward compatibility. + // TokenTypeColon represents the : colon used in named parameters (:param) and slices. + TokenTypeColon TokenType = 70 + // TokenTypeDoubleColon represents the :: PostgreSQL type cast operator (expr::type). + TokenTypeDoubleColon TokenType = 71 + // TokenTypeAssignment represents the := assignment operator used in PL/SQL and named arguments. + TokenTypeAssignment TokenType = 72 + // TokenTypeSemicolon represents the ; statement terminator. + TokenTypeSemicolon TokenType = 73 + // TokenTypeBackslash represents the \ backslash character. + TokenTypeBackslash TokenType = 74 + // TokenTypeLBracket represents the [ left square bracket used for array subscripts and array literals. + TokenTypeLBracket TokenType = 75 + // TokenTypeRBracket represents the ] right square bracket. + TokenTypeRBracket TokenType = 76 + // TokenTypeAmpersand represents the & bitwise AND operator. + TokenTypeAmpersand TokenType = 77 + // TokenTypePipe represents the | bitwise OR operator. + TokenTypePipe TokenType = 78 + // TokenTypeCaret represents the ^ bitwise XOR or exponentiation operator. + TokenTypeCaret TokenType = 79 + // TokenTypeLBrace represents the { left curly brace used in JSON literals and format strings. + TokenTypeLBrace TokenType = 80 + // TokenTypeRBrace represents the } right curly brace. + TokenTypeRBrace TokenType = 81 + // TokenTypeRArrow represents the => fat arrow used in named argument syntax. + TokenTypeRArrow TokenType = 82 + // TokenTypeSharp represents the # hash character used in PostgreSQL path operators. + TokenTypeSharp TokenType = 83 + // TokenTypeTilde represents the ~ regular expression match operator (PostgreSQL). + TokenTypeTilde TokenType = 84 + // TokenTypeExclamationMark represents the ! logical NOT or factorial operator. TokenTypeExclamationMark TokenType = 85 - TokenTypeAtSign TokenType = 86 - TokenTypeQuestion TokenType = 87 + // TokenTypeAtSign represents the @ at-sign used in PostgreSQL full-text search and JSON operators. + TokenTypeAtSign TokenType = 86 + // TokenTypeQuestion represents the ? parameter placeholder (JDBC) and JSON key existence operator (PostgreSQL). + TokenTypeQuestion TokenType = 87 // Compound operators (100-149) - TokenTypeTildeAsterisk TokenType = 100 - TokenTypeExclamationMarkTilde TokenType = 101 - TokenTypeExclamationMarkTildeAsterisk TokenType = 102 - TokenTypeDoubleTilde TokenType = 103 - TokenTypeDoubleTildeAsterisk TokenType = 104 - TokenTypeExclamationMarkDoubleTilde TokenType = 105 + // These multi-character operators are produced when the tokenizer recognises + // a specific combination of characters as a single logical token. + + // TokenTypeTildeAsterisk represents the ~* case-insensitive regex match operator (PostgreSQL). + TokenTypeTildeAsterisk TokenType = 100 + // TokenTypeExclamationMarkTilde represents the !~ regex non-match operator (PostgreSQL). + TokenTypeExclamationMarkTilde TokenType = 101 + // TokenTypeExclamationMarkTildeAsterisk represents the !~* case-insensitive regex non-match operator (PostgreSQL). + TokenTypeExclamationMarkTildeAsterisk TokenType = 102 + // TokenTypeDoubleTilde represents the ~~ LIKE operator alias (PostgreSQL). + TokenTypeDoubleTilde TokenType = 103 + // TokenTypeDoubleTildeAsterisk represents the ~~* ILIKE operator alias (PostgreSQL). + TokenTypeDoubleTildeAsterisk TokenType = 104 + // TokenTypeExclamationMarkDoubleTilde represents the !~~ NOT LIKE operator alias (PostgreSQL). + TokenTypeExclamationMarkDoubleTilde TokenType = 105 + // TokenTypeExclamationMarkDoubleTildeAsterisk represents the !~~* NOT ILIKE operator alias (PostgreSQL). TokenTypeExclamationMarkDoubleTildeAsterisk TokenType = 106 - TokenTypeShiftLeft TokenType = 107 - TokenTypeShiftRight TokenType = 108 - TokenTypeOverlap TokenType = 109 - TokenTypeDoubleExclamationMark TokenType = 110 - TokenTypeCaretAt TokenType = 111 - TokenTypePGSquareRoot TokenType = 112 - TokenTypePGCubeRoot TokenType = 113 + // TokenTypeShiftLeft represents the << bitwise left-shift operator. + TokenTypeShiftLeft TokenType = 107 + // TokenTypeShiftRight represents the >> bitwise right-shift operator. + TokenTypeShiftRight TokenType = 108 + // TokenTypeOverlap represents the && range overlap operator (PostgreSQL). + TokenTypeOverlap TokenType = 109 + // TokenTypeDoubleExclamationMark represents the !! prefix factorial operator (PostgreSQL). + TokenTypeDoubleExclamationMark TokenType = 110 + // TokenTypeCaretAt represents the ^@ starts-with string operator (PostgreSQL 11+). + TokenTypeCaretAt TokenType = 111 + // TokenTypePGSquareRoot represents the |/ square root prefix operator (PostgreSQL). + TokenTypePGSquareRoot TokenType = 112 + // TokenTypePGCubeRoot represents the ||/ cube root prefix operator (PostgreSQL). + TokenTypePGCubeRoot TokenType = 113 + // JSON/JSONB operators (PostgreSQL) - TokenTypeArrow TokenType = 114 // -> JSON field access (returns JSON) - TokenTypeLongArrow TokenType = 115 // ->> JSON field access (returns text) - TokenTypeHashArrow TokenType = 116 // #> JSON path access (returns JSON) - TokenTypeHashLongArrow TokenType = 117 // #>> JSON path access (returns text) - TokenTypeAtArrow TokenType = 118 // @> JSON contains - TokenTypeArrowAt TokenType = 119 // <@ JSON is contained by - TokenTypeHashMinus TokenType = 120 // #- Delete at JSON path - TokenTypeAtQuestion TokenType = 121 // @? JSON path query - TokenTypeAtAt TokenType = 122 // @@ Full text search - TokenTypeQuestionAnd TokenType = 123 // ?& JSON key exists all - TokenTypeQuestionPipe TokenType = 124 // ?| JSON key exists any + + // TokenTypeArrow represents the -> operator that returns a JSON field value as a JSON object. + // Example: data->'name' returns the "name" field as JSON. + TokenTypeArrow TokenType = 114 + // TokenTypeLongArrow represents the ->> operator that returns a JSON field value as text. + // Example: data->>'name' returns the "name" field as a text string. + TokenTypeLongArrow TokenType = 115 + // TokenTypeHashArrow represents the #> operator that returns a JSON value at a path as JSON. + // Example: data#>'{address,city}' returns the nested value as JSON. + TokenTypeHashArrow TokenType = 116 + // TokenTypeHashLongArrow represents the #>> operator that returns a JSON value at a path as text. + // Example: data#>>'{address,city}' returns the nested value as text. + TokenTypeHashLongArrow TokenType = 117 + // TokenTypeAtArrow represents the @> containment operator: left JSON value contains right. + // Example: data @> '{"status":"active"}' checks if data contains the given JSON. + TokenTypeAtArrow TokenType = 118 + // TokenTypeArrowAt represents the <@ containment operator: left JSON value is contained by right. + // Example: '{"a":1}' <@ data checks if the left-hand JSON is a subset of data. + TokenTypeArrowAt TokenType = 119 + // TokenTypeHashMinus represents the #- operator that deletes a key or index at the given path. + // Example: data #- '{address,zip}' removes the "zip" key from the nested "address" object. + TokenTypeHashMinus TokenType = 120 + // TokenTypeAtQuestion represents the @? operator that tests whether a JSON path returns any values. + // Example: data @? '$.address.city' checks whether the path produces a result. + TokenTypeAtQuestion TokenType = 121 + // TokenTypeAtAt represents the @@ operator used for full-text search matching. + // Example: to_tsvector(text) @@ to_tsquery('query'). + TokenTypeAtAt TokenType = 122 + // TokenTypeQuestionAnd represents the ?& operator that checks whether all given keys exist. + // Example: data ?& array['name','email'] returns true if both keys exist in the JSON object. + TokenTypeQuestionAnd TokenType = 123 + // TokenTypeQuestionPipe represents the ?| operator that checks whether any of the given keys exist. + // Example: data ?| array['name','email'] returns true if at least one key exists. + TokenTypeQuestionPipe TokenType = 124 + // TokenTypeCustomBinaryOperator represents a user-defined or dialect-specific binary operator + // not covered by any other constant (e.g., custom PostgreSQL operators). TokenTypeCustomBinaryOperator TokenType = 125 // SQL Keywords (200-399) - TokenTypeKeyword TokenType = 200 // Generic keyword - TokenTypeSelect TokenType = 201 - TokenTypeFrom TokenType = 202 - TokenTypeWhere TokenType = 203 - TokenTypeJoin TokenType = 204 - TokenTypeInner TokenType = 205 - TokenTypeLeft TokenType = 206 - TokenTypeRight TokenType = 207 - TokenTypeOuter TokenType = 208 - TokenTypeOn TokenType = 209 - TokenTypeAs TokenType = 210 - TokenTypeAnd TokenType = 211 - TokenTypeOr TokenType = 212 - TokenTypeNot TokenType = 213 - TokenTypeIn TokenType = 214 - TokenTypeLike TokenType = 215 + // These token types represent reserved and non-reserved SQL keywords. + + // TokenTypeKeyword is the generic keyword token type for words that are recognised + // as SQL keywords but do not have a more specific constant assigned. + TokenTypeKeyword TokenType = 200 + // TokenTypeSelect represents the SELECT keyword that begins a query. + TokenTypeSelect TokenType = 201 + // TokenTypeFrom represents the FROM keyword that introduces the table source. + TokenTypeFrom TokenType = 202 + // TokenTypeWhere represents the WHERE keyword that begins the filter condition. + TokenTypeWhere TokenType = 203 + // TokenTypeJoin represents the JOIN keyword (typically preceded by INNER, LEFT, etc.). + TokenTypeJoin TokenType = 204 + // TokenTypeInner represents the INNER keyword used in INNER JOIN. + TokenTypeInner TokenType = 205 + // TokenTypeLeft represents the LEFT keyword used in LEFT JOIN and LEFT OUTER JOIN. + TokenTypeLeft TokenType = 206 + // TokenTypeRight represents the RIGHT keyword used in RIGHT JOIN and RIGHT OUTER JOIN. + TokenTypeRight TokenType = 207 + // TokenTypeOuter represents the OUTER keyword used in LEFT/RIGHT/FULL OUTER JOIN. + TokenTypeOuter TokenType = 208 + // TokenTypeOn represents the ON keyword that introduces a join condition. + TokenTypeOn TokenType = 209 + // TokenTypeAs represents the AS keyword used in aliases (table AS alias, column AS alias). + TokenTypeAs TokenType = 210 + // TokenTypeAnd represents the AND logical operator combining conditions. + TokenTypeAnd TokenType = 211 + // TokenTypeOr represents the OR logical operator combining conditions. + TokenTypeOr TokenType = 212 + // TokenTypeNot represents the NOT logical negation operator. + TokenTypeNot TokenType = 213 + // TokenTypeIn represents the IN operator for membership tests (expr IN (list)). + TokenTypeIn TokenType = 214 + // TokenTypeLike represents the LIKE pattern-matching operator. + TokenTypeLike TokenType = 215 + // TokenTypeBetween represents the BETWEEN range operator (expr BETWEEN low AND high). TokenTypeBetween TokenType = 216 - TokenTypeIs TokenType = 217 - TokenTypeNull TokenType = 218 - TokenTypeTrue TokenType = 219 - TokenTypeFalse TokenType = 220 - TokenTypeCase TokenType = 221 - TokenTypeWhen TokenType = 222 - TokenTypeThen TokenType = 223 - TokenTypeElse TokenType = 224 - TokenTypeEnd TokenType = 225 - TokenTypeGroup TokenType = 226 - TokenTypeBy TokenType = 227 - TokenTypeHaving TokenType = 228 - TokenTypeOrder TokenType = 229 - TokenTypeAsc TokenType = 230 - TokenTypeDesc TokenType = 231 - TokenTypeLimit TokenType = 232 - TokenTypeOffset TokenType = 233 + // TokenTypeIs represents the IS operator used with NULL, TRUE, FALSE. + TokenTypeIs TokenType = 217 + // TokenTypeNull represents the NULL literal value. + TokenTypeNull TokenType = 218 + // TokenTypeTrue represents the TRUE boolean literal. + TokenTypeTrue TokenType = 219 + // TokenTypeFalse represents the FALSE boolean literal. + TokenTypeFalse TokenType = 220 + // TokenTypeCase represents the CASE keyword beginning a conditional expression. + TokenTypeCase TokenType = 221 + // TokenTypeWhen represents the WHEN keyword inside a CASE expression. + TokenTypeWhen TokenType = 222 + // TokenTypeThen represents the THEN keyword inside a CASE WHEN clause. + TokenTypeThen TokenType = 223 + // TokenTypeElse represents the ELSE keyword for the default branch in a CASE expression. + TokenTypeElse TokenType = 224 + // TokenTypeEnd represents the END keyword closing a CASE expression or block. + TokenTypeEnd TokenType = 225 + // TokenTypeGroup represents the GROUP keyword as part of GROUP BY. + TokenTypeGroup TokenType = 226 + // TokenTypeBy represents the BY keyword used with GROUP BY and ORDER BY. + TokenTypeBy TokenType = 227 + // TokenTypeHaving represents the HAVING keyword for filtering grouped results. + TokenTypeHaving TokenType = 228 + // TokenTypeOrder represents the ORDER keyword as part of ORDER BY. + TokenTypeOrder TokenType = 229 + // TokenTypeAsc represents the ASC sort direction keyword (ascending order). + TokenTypeAsc TokenType = 230 + // TokenTypeDesc represents the DESC sort direction keyword (descending order). + TokenTypeDesc TokenType = 231 + // TokenTypeLimit represents the LIMIT keyword for restricting result count (MySQL, PostgreSQL, SQLite). + TokenTypeLimit TokenType = 232 + // TokenTypeOffset represents the OFFSET keyword for skipping rows in a result set. + TokenTypeOffset TokenType = 233 // DML Keywords (234-239) + // Data Manipulation Language keywords for modifying table data. + + // TokenTypeInsert represents the INSERT keyword beginning an INSERT statement. TokenTypeInsert TokenType = 234 + // TokenTypeUpdate represents the UPDATE keyword beginning an UPDATE statement. TokenTypeUpdate TokenType = 235 + // TokenTypeDelete represents the DELETE keyword beginning a DELETE statement. TokenTypeDelete TokenType = 236 - TokenTypeInto TokenType = 237 + // TokenTypeInto represents the INTO keyword used in INSERT INTO and other clauses. + TokenTypeInto TokenType = 237 + // TokenTypeValues represents the VALUES keyword introducing a list of row values. TokenTypeValues TokenType = 238 - TokenTypeSet TokenType = 239 + // TokenTypeSet represents the SET keyword introducing column assignments in UPDATE. + TokenTypeSet TokenType = 239 // DDL Keywords (240-249) - TokenTypeCreate TokenType = 240 - TokenTypeAlter TokenType = 241 - TokenTypeDrop TokenType = 242 - TokenTypeTable TokenType = 243 - TokenTypeIndex TokenType = 244 - TokenTypeView TokenType = 245 - TokenTypeColumn TokenType = 246 + // Data Definition Language keywords for managing database schema objects. + + // TokenTypeCreate represents the CREATE keyword beginning a DDL creation statement. + TokenTypeCreate TokenType = 240 + // TokenTypeAlter represents the ALTER keyword beginning a DDL modification statement. + TokenTypeAlter TokenType = 241 + // TokenTypeDrop represents the DROP keyword beginning a DDL deletion statement. + TokenTypeDrop TokenType = 242 + // TokenTypeTable represents the TABLE keyword used in DDL statements (CREATE TABLE, etc.). + TokenTypeTable TokenType = 243 + // TokenTypeIndex represents the INDEX keyword used in CREATE/DROP INDEX statements. + TokenTypeIndex TokenType = 244 + // TokenTypeView represents the VIEW keyword used in CREATE/DROP VIEW statements. + TokenTypeView TokenType = 245 + // TokenTypeColumn represents the COLUMN keyword used in ALTER TABLE ADD/DROP COLUMN. + TokenTypeColumn TokenType = 246 + // TokenTypeDatabase represents the DATABASE keyword used in CREATE/DROP DATABASE. TokenTypeDatabase TokenType = 247 - TokenTypeSchema TokenType = 248 - TokenTypeTrigger TokenType = 249 + // TokenTypeSchema represents the SCHEMA keyword used in CREATE/DROP SCHEMA. + TokenTypeSchema TokenType = 248 + // TokenTypeTrigger represents the TRIGGER keyword used in CREATE/DROP TRIGGER. + TokenTypeTrigger TokenType = 249 // Aggregate functions (250-269) + // Standard SQL aggregate function keywords recognised by the tokenizer. + + // TokenTypeCount represents the COUNT aggregate function. TokenTypeCount TokenType = 250 - TokenTypeSum TokenType = 251 - TokenTypeAvg TokenType = 252 - TokenTypeMin TokenType = 253 - TokenTypeMax TokenType = 254 + // TokenTypeSum represents the SUM aggregate function. + TokenTypeSum TokenType = 251 + // TokenTypeAvg represents the AVG (average) aggregate function. + TokenTypeAvg TokenType = 252 + // TokenTypeMin represents the MIN aggregate function returning the smallest value. + TokenTypeMin TokenType = 253 + // TokenTypeMax represents the MAX aggregate function returning the largest value. + TokenTypeMax TokenType = 254 // Compound keywords (270-279) - TokenTypeGroupBy TokenType = 270 - TokenTypeOrderBy TokenType = 271 - TokenTypeLeftJoin TokenType = 272 + // Multi-word compound SQL keywords represented as single token types for convenience. + + // TokenTypeGroupBy represents the compound GROUP BY keyword pair. + TokenTypeGroupBy TokenType = 270 + // TokenTypeOrderBy represents the compound ORDER BY keyword pair. + TokenTypeOrderBy TokenType = 271 + // TokenTypeLeftJoin represents the compound LEFT JOIN keyword pair. + TokenTypeLeftJoin TokenType = 272 + // TokenTypeRightJoin represents the compound RIGHT JOIN keyword pair. TokenTypeRightJoin TokenType = 273 + // TokenTypeInnerJoin represents the compound INNER JOIN keyword pair. TokenTypeInnerJoin TokenType = 274 + // TokenTypeOuterJoin represents the compound OUTER JOIN keyword pair. TokenTypeOuterJoin TokenType = 275 - TokenTypeFullJoin TokenType = 276 + // TokenTypeFullJoin represents the compound FULL JOIN keyword pair. + TokenTypeFullJoin TokenType = 276 + // TokenTypeCrossJoin represents the compound CROSS JOIN keyword pair. TokenTypeCrossJoin TokenType = 277 // CTE and Set Operations (280-299) - TokenTypeWith TokenType = 280 + + // TokenTypeWith represents the WITH keyword beginning a Common Table Expression (CTE). + TokenTypeWith TokenType = 280 + // TokenTypeRecursive represents the RECURSIVE modifier in WITH RECURSIVE CTEs. TokenTypeRecursive TokenType = 281 - TokenTypeUnion TokenType = 282 - TokenTypeExcept TokenType = 283 + // TokenTypeUnion represents the UNION set operation combining two result sets. + TokenTypeUnion TokenType = 282 + // TokenTypeExcept represents the EXCEPT set operation returning rows in the left set not in the right. + TokenTypeExcept TokenType = 283 + // TokenTypeIntersect represents the INTERSECT set operation returning rows present in both sets. TokenTypeIntersect TokenType = 284 - TokenTypeAll TokenType = 285 + // TokenTypeAll represents the ALL modifier used with UNION/EXCEPT/INTERSECT and quantified predicates. + TokenTypeAll TokenType = 285 // Window Function Keywords (300-319) - TokenTypeOver TokenType = 300 + // Keywords used within window function OVER clauses and frame specifications. + + // TokenTypeOver represents the OVER keyword introducing a window specification. + TokenTypeOver TokenType = 300 + // TokenTypePartition represents the PARTITION keyword in PARTITION BY window clause. TokenTypePartition TokenType = 301 - TokenTypeRows TokenType = 302 - TokenTypeRange TokenType = 303 + // TokenTypeRows represents the ROWS mode in a window frame (physical row offsets). + TokenTypeRows TokenType = 302 + // TokenTypeRange represents the RANGE mode in a window frame (logical value offsets). + TokenTypeRange TokenType = 303 + // TokenTypeUnbounded represents UNBOUNDED in window frames (UNBOUNDED PRECEDING/FOLLOWING). TokenTypeUnbounded TokenType = 304 + // TokenTypePreceding represents PRECEDING in window frame bounds. TokenTypePreceding TokenType = 305 + // TokenTypeFollowing represents FOLLOWING in window frame bounds. TokenTypeFollowing TokenType = 306 - TokenTypeCurrent TokenType = 307 - TokenTypeRow TokenType = 308 - TokenTypeGroups TokenType = 309 - TokenTypeFilter TokenType = 310 - TokenTypeExclude TokenType = 311 + // TokenTypeCurrent represents CURRENT in CURRENT ROW frame bound. + TokenTypeCurrent TokenType = 307 + // TokenTypeRow represents ROW in the CURRENT ROW window frame bound. + TokenTypeRow TokenType = 308 + // TokenTypeGroups represents the GROUPS mode in a window frame (peer group offsets, SQL:2011). + TokenTypeGroups TokenType = 309 + // TokenTypeFilter represents the FILTER keyword for conditional aggregation (e.g., COUNT(*) FILTER (WHERE ...)). + TokenTypeFilter TokenType = 310 + // TokenTypeExclude represents the EXCLUDE keyword in window frame EXCLUDE clauses. + TokenTypeExclude TokenType = 311 // Additional Join Keywords (320-329) - TokenTypeCross TokenType = 320 + + // TokenTypeCross represents the CROSS keyword used in CROSS JOIN. + TokenTypeCross TokenType = 320 + // TokenTypeNatural represents the NATURAL keyword used in NATURAL JOIN (joins on all matching column names). TokenTypeNatural TokenType = 321 - TokenTypeFull TokenType = 322 - TokenTypeUsing TokenType = 323 - TokenTypeLateral TokenType = 324 // LATERAL keyword for correlated subqueries in FROM clause + // TokenTypeFull represents the FULL keyword used in FULL OUTER JOIN. + TokenTypeFull TokenType = 322 + // TokenTypeUsing represents the USING keyword that specifies shared column names in a JOIN. + TokenTypeUsing TokenType = 323 + // TokenTypeLateral represents the LATERAL keyword allowing correlated subqueries in the FROM clause. + // Example: FROM users u, LATERAL (SELECT * FROM orders WHERE user_id = u.id) o + TokenTypeLateral TokenType = 324 // Constraint Keywords (330-349) - TokenTypePrimary TokenType = 330 - TokenTypeKey TokenType = 331 - TokenTypeForeign TokenType = 332 - TokenTypeReferences TokenType = 333 - TokenTypeUnique TokenType = 334 - TokenTypeCheck TokenType = 335 - TokenTypeDefault TokenType = 336 + // Keywords used in table and column constraint definitions. + + // TokenTypePrimary represents the PRIMARY keyword in PRIMARY KEY constraints. + TokenTypePrimary TokenType = 330 + // TokenTypeKey represents the KEY keyword in PRIMARY KEY and FOREIGN KEY constraints. + TokenTypeKey TokenType = 331 + // TokenTypeForeign represents the FOREIGN keyword in FOREIGN KEY constraints. + TokenTypeForeign TokenType = 332 + // TokenTypeReferences represents the REFERENCES keyword in FOREIGN KEY constraints. + TokenTypeReferences TokenType = 333 + // TokenTypeUnique represents the UNIQUE constraint keyword. + TokenTypeUnique TokenType = 334 + // TokenTypeCheck represents the CHECK constraint keyword. + TokenTypeCheck TokenType = 335 + // TokenTypeDefault represents the DEFAULT constraint keyword specifying a default column value. + TokenTypeDefault TokenType = 336 + // TokenTypeAutoIncrement represents the AUTO_INCREMENT column attribute (MySQL). + // In PostgreSQL, the equivalent is SERIAL or GENERATED ALWAYS AS IDENTITY. TokenTypeAutoIncrement TokenType = 337 - TokenTypeConstraint TokenType = 338 - TokenTypeNotNull TokenType = 339 - TokenTypeNullable TokenType = 340 + // TokenTypeConstraint represents the CONSTRAINT keyword that names a table constraint. + TokenTypeConstraint TokenType = 338 + // TokenTypeNotNull represents the NOT NULL constraint keyword pair. + TokenTypeNotNull TokenType = 339 + // TokenTypeNullable represents the NULLABLE keyword (some dialects allow explicit nullable columns). + TokenTypeNullable TokenType = 340 // Additional SQL Keywords (350-399) + + // TokenTypeDistinct represents the DISTINCT keyword for removing duplicate rows. TokenTypeDistinct TokenType = 350 - TokenTypeExists TokenType = 351 - TokenTypeAny TokenType = 352 - TokenTypeSome TokenType = 353 - TokenTypeCast TokenType = 354 - TokenTypeConvert TokenType = 355 - TokenTypeCollate TokenType = 356 - TokenTypeCascade TokenType = 357 + // TokenTypeExists represents the EXISTS keyword for subquery existence tests. + TokenTypeExists TokenType = 351 + // TokenTypeAny represents the ANY quantifier used with comparison operators and subqueries. + TokenTypeAny TokenType = 352 + // TokenTypeSome represents the SOME quantifier (synonym for ANY in most dialects). + TokenTypeSome TokenType = 353 + // TokenTypeCast represents the CAST keyword for explicit type conversion (CAST(expr AS type)). + TokenTypeCast TokenType = 354 + // TokenTypeConvert represents the CONVERT keyword for type or charset conversion (MySQL, SQL Server). + TokenTypeConvert TokenType = 355 + // TokenTypeCollate represents the COLLATE keyword specifying a collation for comparisons. + TokenTypeCollate TokenType = 356 + // TokenTypeCascade represents the CASCADE option in DROP and constraint definitions. + TokenTypeCascade TokenType = 357 + // TokenTypeRestrict represents the RESTRICT option preventing drops when dependent objects exist. TokenTypeRestrict TokenType = 358 - TokenTypeReplace TokenType = 359 - TokenTypeRename TokenType = 360 - TokenTypeTo TokenType = 361 - TokenTypeIf TokenType = 362 - TokenTypeOnly TokenType = 363 - TokenTypeFor TokenType = 364 - TokenTypeNulls TokenType = 365 - TokenTypeFirst TokenType = 366 - TokenTypeLast TokenType = 367 - TokenTypeFetch TokenType = 368 // FETCH keyword for FETCH FIRST/NEXT clause - TokenTypeNext TokenType = 369 // NEXT keyword for FETCH NEXT clause + // TokenTypeReplace represents the REPLACE keyword used in INSERT OR REPLACE and REPLACE INTO (MySQL). + TokenTypeReplace TokenType = 359 + // TokenTypeRename represents the RENAME keyword used in ALTER TABLE RENAME. + TokenTypeRename TokenType = 360 + // TokenTypeTo represents the TO keyword used in RENAME ... TO and GRANT ... TO. + TokenTypeTo TokenType = 361 + // TokenTypeIf represents the IF keyword used in IF EXISTS and IF NOT EXISTS clauses. + TokenTypeIf TokenType = 362 + // TokenTypeOnly represents the ONLY keyword used in inheritance-aware queries (PostgreSQL). + TokenTypeOnly TokenType = 363 + // TokenTypeFor represents the FOR keyword used in FOR UPDATE, FOR SHARE, and FETCH FOR. + TokenTypeFor TokenType = 364 + // TokenTypeNulls represents the NULLS keyword used in NULLS FIRST / NULLS LAST ordering. + TokenTypeNulls TokenType = 365 + // TokenTypeFirst represents the FIRST keyword used in NULLS FIRST and FETCH FIRST. + TokenTypeFirst TokenType = 366 + // TokenTypeLast represents the LAST keyword used in NULLS LAST. + TokenTypeLast TokenType = 367 + // TokenTypeFetch represents the FETCH keyword beginning a FETCH FIRST/NEXT clause (SQL standard LIMIT). + // Example: FETCH FIRST 10 ROWS ONLY + TokenTypeFetch TokenType = 368 + // TokenTypeNext represents the NEXT keyword used in FETCH NEXT ... ROWS ONLY. + TokenTypeNext TokenType = 369 // MERGE Statement Keywords (370-379) - TokenTypeMerge TokenType = 370 + // Keywords used in SQL:2003 MERGE statements. + + // TokenTypeMerge represents the MERGE keyword beginning a MERGE statement. + TokenTypeMerge TokenType = 370 + // TokenTypeMatched represents the MATCHED keyword in WHEN MATCHED and WHEN NOT MATCHED clauses. TokenTypeMatched TokenType = 371 - TokenTypeTarget TokenType = 372 - TokenTypeSource TokenType = 373 + // TokenTypeTarget represents the TARGET keyword (used in some dialect MERGE syntax). + TokenTypeTarget TokenType = 372 + // TokenTypeSource represents the SOURCE keyword (used in some dialect MERGE syntax). + TokenTypeSource TokenType = 373 - // Materialized View Keywords (374-379) + // Materialized View and FETCH clause Keywords (374-379) + + // TokenTypeMaterialized represents the MATERIALIZED keyword in CREATE/DROP/REFRESH MATERIALIZED VIEW. TokenTypeMaterialized TokenType = 374 - TokenTypeRefresh TokenType = 375 - TokenTypeTies TokenType = 376 // TIES keyword for WITH TIES in FETCH clause - TokenTypePercent TokenType = 377 // PERCENT keyword for FETCH ... PERCENT ROWS - TokenTypeTruncate TokenType = 378 // TRUNCATE keyword for TRUNCATE TABLE statement - TokenTypeReturning TokenType = 379 // RETURNING keyword for PostgreSQL RETURNING clause + // TokenTypeRefresh represents the REFRESH keyword in REFRESH MATERIALIZED VIEW. + TokenTypeRefresh TokenType = 375 + // TokenTypeTies represents the TIES keyword in FETCH FIRST n ROWS WITH TIES. + // WITH TIES causes the last group of rows with equal ordering values to all be returned. + TokenTypeTies TokenType = 376 + // TokenTypePercent represents the PERCENT keyword in FETCH FIRST n PERCENT ROWS ONLY. + TokenTypePercent TokenType = 377 + // TokenTypeTruncate represents the TRUNCATE keyword beginning a TRUNCATE TABLE statement. + TokenTypeTruncate TokenType = 378 + // TokenTypeReturning represents the RETURNING keyword in PostgreSQL INSERT/UPDATE/DELETE statements. + // RETURNING causes the modified rows to be returned as a result set. + TokenTypeReturning TokenType = 379 // Row Locking Keywords (380-389) - TokenTypeShare TokenType = 380 // SHARE keyword for FOR SHARE row locking - TokenTypeNoWait TokenType = 381 // NOWAIT keyword for FOR UPDATE/SHARE NOWAIT - TokenTypeSkip TokenType = 382 // SKIP keyword for FOR UPDATE SKIP LOCKED - TokenTypeLocked TokenType = 383 // LOCKED keyword for SKIP LOCKED - TokenTypeOf TokenType = 384 // OF keyword for FOR UPDATE OF table_name + // Keywords used in SELECT ... FOR UPDATE/SHARE row-locking clauses. + + // TokenTypeShare represents the SHARE keyword in FOR SHARE row locking. + TokenTypeShare TokenType = 380 + // TokenTypeNoWait represents the NOWAIT keyword causing an immediate error instead of waiting + // for locked rows (FOR UPDATE NOWAIT, FOR SHARE NOWAIT). + TokenTypeNoWait TokenType = 381 + // TokenTypeSkip represents the SKIP keyword in FOR UPDATE SKIP LOCKED, + // causing locked rows to be silently skipped. + TokenTypeSkip TokenType = 382 + // TokenTypeLocked represents the LOCKED keyword in SKIP LOCKED. + TokenTypeLocked TokenType = 383 + // TokenTypeOf represents the OF keyword in FOR UPDATE OF table_name, + // restricting locking to specific tables in a JOIN. + TokenTypeOf TokenType = 384 // Grouping Set Keywords (390-399) + // Keywords used for advanced grouping in GROUP BY clauses. + + // TokenTypeGroupingSets represents the GROUPING SETS keyword pair for + // specifying explicit grouping combinations in GROUP BY. TokenTypeGroupingSets TokenType = 390 - TokenTypeRollup TokenType = 391 - TokenTypeCube TokenType = 392 - TokenTypeGrouping TokenType = 393 - TokenTypeSets TokenType = 394 // SETS keyword for GROUPING SETS - TokenTypeArray TokenType = 395 // ARRAY keyword for PostgreSQL array constructor - TokenTypeWithin TokenType = 396 // WITHIN keyword for WITHIN GROUP clause + // TokenTypeRollup represents the ROLLUP keyword for hierarchical grouping subtotals. + // Example: GROUP BY ROLLUP (year, quarter, month) + TokenTypeRollup TokenType = 391 + // TokenTypeCube represents the CUBE keyword for all possible grouping combinations. + // Example: GROUP BY CUBE (region, product) + TokenTypeCube TokenType = 392 + // TokenTypeGrouping represents the GROUPING function keyword that indicates whether + // a column is aggregated in a GROUPING SETS/ROLLUP/CUBE expression. + TokenTypeGrouping TokenType = 393 + // TokenTypeSets represents the SETS keyword used in GROUPING SETS (...). + TokenTypeSets TokenType = 394 + // TokenTypeArray represents the ARRAY keyword for PostgreSQL array constructors. + // Example: ARRAY[1, 2, 3] or ARRAY(SELECT id FROM users) + TokenTypeArray TokenType = 395 + // TokenTypeWithin represents the WITHIN keyword in ordered-set aggregate functions. + // Example: PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY salary) + TokenTypeWithin TokenType = 396 // Role/Permission Keywords (400-419) - TokenTypeRole TokenType = 400 - TokenTypeUser TokenType = 401 - TokenTypeGrant TokenType = 402 - TokenTypeRevoke TokenType = 403 - TokenTypePrivilege TokenType = 404 - TokenTypePassword TokenType = 405 - TokenTypeLogin TokenType = 406 - TokenTypeSuperuser TokenType = 407 - TokenTypeCreateDB TokenType = 408 + // Keywords used in GRANT, REVOKE, CREATE/ALTER ROLE, and user management statements. + + // TokenTypeRole represents the ROLE keyword in CREATE/ALTER/DROP ROLE statements. + TokenTypeRole TokenType = 400 + // TokenTypeUser represents the USER keyword in CREATE/ALTER/DROP USER statements. + TokenTypeUser TokenType = 401 + // TokenTypeGrant represents the GRANT keyword for granting privileges to roles/users. + TokenTypeGrant TokenType = 402 + // TokenTypeRevoke represents the REVOKE keyword for revoking previously granted privileges. + TokenTypeRevoke TokenType = 403 + // TokenTypePrivilege represents the PRIVILEGE keyword in GRANT/REVOKE statements. + TokenTypePrivilege TokenType = 404 + // TokenTypePassword represents the PASSWORD keyword in ALTER USER ... PASSWORD statements. + TokenTypePassword TokenType = 405 + // TokenTypeLogin represents the LOGIN option keyword in CREATE/ALTER ROLE ... LOGIN. + TokenTypeLogin TokenType = 406 + // TokenTypeSuperuser represents the SUPERUSER option keyword in CREATE/ALTER ROLE. + TokenTypeSuperuser TokenType = 407 + // TokenTypeCreateDB represents the CREATEDB option keyword allowing a role to create databases. + TokenTypeCreateDB TokenType = 408 + // TokenTypeCreateRole represents the CREATEROLE option keyword allowing a role to create other roles. TokenTypeCreateRole TokenType = 409 // Transaction Keywords (420-429) - TokenTypeBegin TokenType = 420 - TokenTypeCommit TokenType = 421 - TokenTypeRollback TokenType = 422 + // Keywords for transaction control statements. + + // TokenTypeBegin represents the BEGIN keyword starting an explicit transaction block. + TokenTypeBegin TokenType = 420 + // TokenTypeCommit represents the COMMIT keyword permanently saving a transaction. + TokenTypeCommit TokenType = 421 + // TokenTypeRollback represents the ROLLBACK keyword undoing a transaction. + TokenTypeRollback TokenType = 422 + // TokenTypeSavepoint represents the SAVEPOINT keyword creating a named transaction savepoint. TokenTypeSavepoint TokenType = 423 // Data Type Keywords (430-449) - TokenTypeInt TokenType = 430 - TokenTypeInteger TokenType = 431 - TokenTypeBigInt TokenType = 432 - TokenTypeSmallInt TokenType = 433 - TokenTypeFloat TokenType = 434 - TokenTypeDouble TokenType = 435 - TokenTypeDecimal TokenType = 436 - TokenTypeNumeric TokenType = 437 - TokenTypeVarchar TokenType = 438 - TokenTypeCharDataType TokenType = 439 // Char as data type (TokenTypeChar=12 is for single char token) - TokenTypeText TokenType = 440 - TokenTypeBoolean TokenType = 441 - TokenTypeDate TokenType = 442 - TokenTypeTime TokenType = 443 - TokenTypeTimestamp TokenType = 444 - TokenTypeInterval TokenType = 445 - TokenTypeBlob TokenType = 446 - TokenTypeClob TokenType = 447 - TokenTypeJson TokenType = 448 - TokenTypeUuid TokenType = 449 - - // Special Token Types (500-509) - TokenTypeIllegal TokenType = 500 // For parser compatibility with token.ILLEGAL - TokenTypeAsterisk TokenType = 501 // Explicit asterisk token type - TokenTypeDoublePipe TokenType = 502 // || concatenation operator - TokenTypeILike TokenType = 503 // ILIKE (case-insensitive LIKE, PostgreSQL) - TokenTypeAdd TokenType = 504 // ADD keyword for ALTER TABLE ADD - TokenTypeNosuperuser TokenType = 505 // NOSUPERUSER keyword for ALTER ROLE - TokenTypeNocreatedb TokenType = 506 // NOCREATEDB keyword for ALTER ROLE - TokenTypeNocreaterole TokenType = 507 // NOCREATEROLE keyword for ALTER ROLE - TokenTypeNologin TokenType = 508 // NOLOGIN keyword for ALTER ROLE - TokenTypeValid TokenType = 509 // VALID keyword for ALTER ROLE - TokenTypeDcproperties TokenType = 510 // DCPROPERTIES keyword for ALTER CONNECTOR - TokenTypeUrl TokenType = 511 // URL keyword for ALTER CONNECTOR - TokenTypeOwner TokenType = 512 // OWNER keyword for ALTER CONNECTOR - TokenTypeMember TokenType = 513 // MEMBER keyword for ALTER ROLE - TokenTypeConnector TokenType = 514 // CONNECTOR keyword for CREATE/ALTER CONNECTOR - TokenTypePolicy TokenType = 515 // POLICY keyword for CREATE/ALTER POLICY - TokenTypeUntil TokenType = 516 // UNTIL keyword for VALID UNTIL - TokenTypeReset TokenType = 517 // RESET keyword for ALTER ROLE RESET - TokenTypeShow TokenType = 518 // SHOW keyword for MySQL SHOW commands - TokenTypeDescribe TokenType = 519 // DESCRIBE keyword for MySQL DESCRIBE command - TokenTypeExplain TokenType = 520 // EXPLAIN keyword + // SQL built-in data type keywords recognised by the tokenizer. + + // TokenTypeInt represents the INT data type keyword (32-bit signed integer). + TokenTypeInt TokenType = 430 + // TokenTypeInteger represents the INTEGER data type keyword (synonym for INT in most dialects). + TokenTypeInteger TokenType = 431 + // TokenTypeBigInt represents the BIGINT data type keyword (64-bit signed integer). + TokenTypeBigInt TokenType = 432 + // TokenTypeSmallInt represents the SMALLINT data type keyword (16-bit signed integer). + TokenTypeSmallInt TokenType = 433 + // TokenTypeFloat represents the FLOAT data type keyword (single or double precision floating-point). + TokenTypeFloat TokenType = 434 + // TokenTypeDouble represents the DOUBLE or DOUBLE PRECISION data type keyword. + TokenTypeDouble TokenType = 435 + // TokenTypeDecimal represents the DECIMAL(p,s) fixed-precision data type keyword. + TokenTypeDecimal TokenType = 436 + // TokenTypeNumeric represents the NUMERIC(p,s) fixed-precision data type keyword (synonym for DECIMAL). + TokenTypeNumeric TokenType = 437 + // TokenTypeVarchar represents the VARCHAR(n) variable-length character data type keyword. + TokenTypeVarchar TokenType = 438 + // TokenTypeCharDataType represents the CHAR(n) fixed-length character data type keyword. + // Note: this is distinct from TokenTypeChar (value 12) which represents a single character token. + TokenTypeCharDataType TokenType = 439 + // TokenTypeText represents the TEXT data type keyword for variable-length text. + TokenTypeText TokenType = 440 + // TokenTypeBoolean represents the BOOLEAN data type keyword. + TokenTypeBoolean TokenType = 441 + // TokenTypeDate represents the DATE data type keyword (calendar date without time). + TokenTypeDate TokenType = 442 + // TokenTypeTime represents the TIME data type keyword (time of day without date). + TokenTypeTime TokenType = 443 + // TokenTypeTimestamp represents the TIMESTAMP data type keyword (date and time). + TokenTypeTimestamp TokenType = 444 + // TokenTypeInterval represents the INTERVAL data type keyword for time durations. + TokenTypeInterval TokenType = 445 + // TokenTypeBlob represents the BLOB data type keyword for binary large objects. + TokenTypeBlob TokenType = 446 + // TokenTypeClob represents the CLOB data type keyword for character large objects. + TokenTypeClob TokenType = 447 + // TokenTypeJson represents the JSON data type keyword (PostgreSQL, MySQL 5.7+). + TokenTypeJson TokenType = 448 + // TokenTypeUuid represents the UUID data type keyword (PostgreSQL, SQL Server). + TokenTypeUuid TokenType = 449 + + // Special Token Types (500+) + // Miscellaneous tokens that do not fit cleanly into a single numeric range. + + // TokenTypeIllegal is used for parser compatibility with internal ILLEGAL token values. + TokenTypeIllegal TokenType = 500 + // TokenTypeAsterisk represents an explicit * token used as a wildcard or multiply operator. + // Distinct from TokenTypeMul (62) to allow unambiguous identification of the asterisk character. + TokenTypeAsterisk TokenType = 501 + // TokenTypeDoublePipe represents the || string concatenation operator (SQL standard). + // Distinct from TokenTypeStringConcat (66) for cases where dialect disambiguation is needed. + TokenTypeDoublePipe TokenType = 502 + // TokenTypeILike represents the ILIKE case-insensitive pattern-matching operator (PostgreSQL). + TokenTypeILike TokenType = 503 + // TokenTypeAdd represents the ADD keyword used in ALTER TABLE ADD COLUMN. + TokenTypeAdd TokenType = 504 + // TokenTypeNosuperuser represents the NOSUPERUSER option in ALTER ROLE, removing superuser privilege. + TokenTypeNosuperuser TokenType = 505 + // TokenTypeNocreatedb represents the NOCREATEDB option in ALTER ROLE, removing database creation privilege. + TokenTypeNocreatedb TokenType = 506 + // TokenTypeNocreaterole represents the NOCREATEROLE option in ALTER ROLE, removing role creation privilege. + TokenTypeNocreaterole TokenType = 507 + // TokenTypeNologin represents the NOLOGIN option in ALTER ROLE, preventing login. + TokenTypeNologin TokenType = 508 + // TokenTypeValid represents the VALID keyword used in VALID UNTIL role attribute. + TokenTypeValid TokenType = 509 + // TokenTypeDcproperties represents the DCPROPERTIES keyword used in ALTER CONNECTOR. + TokenTypeDcproperties TokenType = 510 + // TokenTypeUrl represents the URL keyword used in CREATE/ALTER CONNECTOR statements. + TokenTypeUrl TokenType = 511 + // TokenTypeOwner represents the OWNER keyword used in ALTER CONNECTOR ... OWNER TO. + TokenTypeOwner TokenType = 512 + // TokenTypeMember represents the MEMBER keyword used in ALTER ROLE ... MEMBER. + TokenTypeMember TokenType = 513 + // TokenTypeConnector represents the CONNECTOR keyword used in CREATE/ALTER CONNECTOR statements. + TokenTypeConnector TokenType = 514 + // TokenTypePolicy represents the POLICY keyword used in CREATE/ALTER POLICY statements. + TokenTypePolicy TokenType = 515 + // TokenTypeUntil represents the UNTIL keyword used in VALID UNTIL date expressions. + TokenTypeUntil TokenType = 516 + // TokenTypeReset represents the RESET keyword used in ALTER ROLE ... RESET parameter. + TokenTypeReset TokenType = 517 + // TokenTypeShow represents the SHOW keyword used in MySQL SHOW TABLES, SHOW COLUMNS, etc. + TokenTypeShow TokenType = 518 + // TokenTypeDescribe represents the DESCRIBE keyword used in MySQL DESCRIBE table_name. + TokenTypeDescribe TokenType = 519 + // TokenTypeExplain represents the EXPLAIN keyword used to display query execution plans. + TokenTypeExplain TokenType = 520 ) // String returns a string representation of the token type. @@ -1089,7 +1444,15 @@ func (t TokenType) IsLiteral() bool { return false } -// IsDMLKeyword returns true if the token type is a DML keyword +// IsDMLKeyword returns true if the token type is a Data Manipulation Language keyword. +// +// Covered DML keywords: SELECT, INSERT, UPDATE, DELETE, INTO, VALUES, SET, FROM, WHERE. +// +// Example: +// +// if token.Type.IsDMLKeyword() { +// // Handle DML keyword (SELECT, INSERT, UPDATE, DELETE, etc.) +// } func (t TokenType) IsDMLKeyword() bool { switch t { case TokenTypeSelect, TokenTypeInsert, TokenTypeUpdate, TokenTypeDelete, @@ -1099,7 +1462,16 @@ func (t TokenType) IsDMLKeyword() bool { return false } -// IsDDLKeyword returns true if the token type is a DDL keyword +// IsDDLKeyword returns true if the token type is a Data Definition Language keyword. +// +// Covered DDL keywords: CREATE, ALTER, DROP, TRUNCATE, TABLE, INDEX, VIEW, COLUMN, +// DATABASE, SCHEMA, TRIGGER. +// +// Example: +// +// if token.Type.IsDDLKeyword() { +// // Handle DDL keyword (CREATE, ALTER, DROP, TABLE, etc.) +// } func (t TokenType) IsDDLKeyword() bool { switch t { case TokenTypeCreate, TokenTypeAlter, TokenTypeDrop, TokenTypeTruncate, TokenTypeTable, @@ -1110,7 +1482,16 @@ func (t TokenType) IsDDLKeyword() bool { return false } -// IsJoinKeyword returns true if the token type is a JOIN-related keyword +// IsJoinKeyword returns true if the token type is a JOIN-related keyword. +// +// Covered JOIN keywords: JOIN, INNER, LEFT, RIGHT, OUTER, CROSS, NATURAL, FULL, +// INNER JOIN, LEFT JOIN, RIGHT JOIN, OUTER JOIN, FULL JOIN, CROSS JOIN, ON, USING. +// +// Example: +// +// if token.Type.IsJoinKeyword() { +// // Handle JOIN keyword (JOIN, INNER, LEFT, RIGHT, ON, USING, etc.) +// } func (t TokenType) IsJoinKeyword() bool { switch t { case TokenTypeJoin, TokenTypeInner, TokenTypeLeft, TokenTypeRight, @@ -1123,7 +1504,20 @@ func (t TokenType) IsJoinKeyword() bool { return false } -// IsWindowKeyword returns true if the token type is a window function keyword +// IsWindowKeyword returns true if the token type is a window function keyword. +// +// Covered window keywords: OVER, PARTITION, ROWS, RANGE, UNBOUNDED, PRECEDING, +// FOLLOWING, CURRENT, ROW, GROUPS, FILTER, EXCLUDE. +// +// These keywords appear in window function specifications: +// +// RANK() OVER (PARTITION BY dept ORDER BY salary ROWS UNBOUNDED PRECEDING) +// +// Example: +// +// if token.Type.IsWindowKeyword() { +// // Handle window keyword (OVER, PARTITION BY, ROWS, RANGE, etc.) +// } func (t TokenType) IsWindowKeyword() bool { switch t { case TokenTypeOver, TokenTypePartition, TokenTypeRows, TokenTypeRange, @@ -1135,7 +1529,19 @@ func (t TokenType) IsWindowKeyword() bool { return false } -// IsAggregateFunction returns true if the token type is an aggregate function +// IsAggregateFunction returns true if the token type is a standard SQL aggregate function. +// +// Covered aggregate functions: COUNT, SUM, AVG, MIN, MAX. +// +// Note: This method covers only the five standard SQL aggregate functions. Other +// aggregate functions (e.g., ARRAY_AGG, STRING_AGG, JSON_AGG) are represented as +// TokenTypeWord or TokenTypeIdentifier tokens. +// +// Example: +// +// if token.Type.IsAggregateFunction() { +// // Handle aggregate function (COUNT, SUM, AVG, MIN, MAX) +// } func (t TokenType) IsAggregateFunction() bool { switch t { case TokenTypeCount, TokenTypeSum, TokenTypeAvg, TokenTypeMin, TokenTypeMax: @@ -1157,7 +1563,16 @@ func (t TokenType) IsDataType() bool { return t >= TokenRangeDataTypeStart && t < TokenRangeDataTypeEnd } -// IsConstraint returns true if the token type is a constraint keyword +// IsConstraint returns true if the token type is a table or column constraint keyword. +// +// Covered constraint keywords: PRIMARY, KEY, FOREIGN, REFERENCES, UNIQUE, CHECK, +// DEFAULT, AUTO_INCREMENT, CONSTRAINT, NOT NULL, NULLABLE. +// +// Example: +// +// if token.Type.IsConstraint() { +// // Handle constraint keyword (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK, etc.) +// } func (t TokenType) IsConstraint() bool { switch t { case TokenTypePrimary, TokenTypeKey, TokenTypeForeign, TokenTypeReferences, @@ -1168,7 +1583,21 @@ func (t TokenType) IsConstraint() bool { return false } -// IsSetOperation returns true if the token type is a set operation +// IsSetOperation returns true if the token type is a set operation keyword. +// +// Covered set operations: UNION, EXCEPT, INTERSECT, ALL. +// +// These keywords combine multiple query result sets: +// +// SELECT id FROM users UNION ALL SELECT id FROM admins +// SELECT id FROM a EXCEPT SELECT id FROM b +// SELECT id FROM a INTERSECT SELECT id FROM b +// +// Example: +// +// if token.Type.IsSetOperation() { +// // Handle set operation keyword (UNION, EXCEPT, INTERSECT, ALL) +// } func (t TokenType) IsSetOperation() bool { switch t { case TokenTypeUnion, TokenTypeExcept, TokenTypeIntersect, TokenTypeAll: diff --git a/pkg/schema/schema.go b/pkg/schema/schema.go index 3fa9eed0..007751db 100644 --- a/pkg/schema/schema.go +++ b/pkg/schema/schema.go @@ -14,13 +14,15 @@ // Package schema provides schema-aware SQL validation for GoSQLX. // -// This package allows users to define database schemas (tables, columns, -// constraints) and validate SQL queries against them. It can detect -// references to non-existent tables or columns, ambiguous column references, -// and INSERT column count mismatches. -// -// Schemas can be built programmatically or loaded from DDL (CREATE TABLE) -// statements using GoSQLX's own parser. +// The package defines a hierarchy of Schema, Table, Column, and Catalog types that +// represent a relational database schema. A Validator created from a Schema (or Catalog) +// walks the AST produced by the GoSQLX parser and reports semantic errors such as +// references to non-existent tables or columns, ambiguous cross-schema column references, +// and INSERT column-count mismatches. Schemas can be built programmatically or loaded +// from DDL (CREATE TABLE) statements using GoSQLX's own parser via LoadFromDDL. +// All table and column lookups are case-insensitive to match SQL standard behaviour. +// For multi-schema environments the Catalog type resolves table references across schemas +// and returns a clear error when the same table name exists in more than one schema. // // Example - Programmatic schema building: // diff --git a/pkg/sql/ast/alter.go b/pkg/sql/ast/alter.go index 52b47704..6378429a 100644 --- a/pkg/sql/ast/alter.go +++ b/pkg/sql/ast/alter.go @@ -26,6 +26,7 @@ const ( Hash ) +// String returns the SQL keyword for this index type: "BTREE" or "HASH". func (t IndexType) String() string { switch t { case BTree: @@ -44,13 +45,18 @@ type IndexOption struct { Comment string // Used for Comment } +// IndexOptionType identifies which kind of index option is represented. type IndexOptionType int const ( + // UsingIndex specifies the index access method (e.g. USING BTREE). UsingIndex IndexOptionType = iota + // CommentIndex attaches a comment string to the index. CommentIndex ) +// String returns the SQL representation of this index option (e.g. "USING BTREE" +// or "COMMENT 'text'"). func (opt *IndexOption) String() string { switch opt.Type { case UsingIndex: @@ -71,6 +77,8 @@ const ( NullsNotDistinct ) +// String returns the SQL keyword phrase for this nulls-distinct option: +// "NULLS DISTINCT", "NULLS NOT DISTINCT", or an empty string for the default. func (opt NullsDistinctOption) String() string { switch opt { case NullsDistinct: @@ -89,8 +97,13 @@ type AlterStatement struct { Operation AlterOperation } -func (a *AlterStatement) statementNode() {} +func (a *AlterStatement) statementNode() {} + +// TokenLiteral implements Node and returns "ALTER". func (a AlterStatement) TokenLiteral() string { return "ALTER" } + +// Children implements Node and returns the alter operation as a single child, +// or nil if no operation is set. func (a AlterStatement) Children() []Node { if a.Operation != nil { return []Node{a.Operation} @@ -143,7 +156,13 @@ type AlterTableOperation struct { } func (a *AlterTableOperation) alterOperationNode() {} + +// TokenLiteral implements Node and returns "ALTER TABLE". func (a AlterTableOperation) TokenLiteral() string { return "ALTER TABLE" } + +// Children implements Node and returns the child nodes involved in this +// ALTER TABLE operation: the column definition, constraint, projection select, +// and/or alter column operation, depending on the operation type. func (a AlterTableOperation) Children() []Node { var children []Node if a.ColumnDef != nil { @@ -190,21 +209,34 @@ type RoleOption struct { Value interface{} // Can be bool or Expression depending on Type } +// RoleOptionType identifies which role attribute is being set. type RoleOptionType int const ( + // BypassRLS controls whether the role bypasses row-level security policies. BypassRLS RoleOptionType = iota + // ConnectionLimit sets the maximum number of concurrent connections for the role. ConnectionLimit + // CreateDB allows or prevents the role from creating new databases. CreateDB + // CreateRole allows or prevents the role from creating new roles. CreateRole + // Inherit controls whether the role inherits privileges of roles it is a member of. Inherit + // Login allows or prevents the role from logging in. Login + // Password sets the password for the role. Password + // Replication controls whether the role can initiate streaming replication. Replication + // SuperUser grants or revokes superuser privileges. SuperUser + // ValidUntil sets the date and time after which the role's password is no longer valid. ValidUntil ) +// String returns the SQL keyword phrase for this role option (e.g. "SUPERUSER", +// "NOSUPERUSER", "LOGIN", "CONNECTION LIMIT 10", "PASSWORD NULL", etc.). func (opt *RoleOption) String() string { switch opt.Type { case BypassRLS: @@ -268,7 +300,12 @@ type AlterRoleOperation struct { } func (a *AlterRoleOperation) alterOperationNode() {} + +// TokenLiteral implements Node and returns "ALTER ROLE". func (a AlterRoleOperation) TokenLiteral() string { return "ALTER ROLE" } + +// Children implements Node and returns the ConfigValue expression as a child, +// or nil if no config value is set. func (a AlterRoleOperation) Children() []Node { var children []Node if a.ConfigValue != nil { @@ -299,7 +336,12 @@ type AlterPolicyOperation struct { } func (a *AlterPolicyOperation) alterOperationNode() {} + +// TokenLiteral implements Node and returns "ALTER POLICY". func (a AlterPolicyOperation) TokenLiteral() string { return "ALTER POLICY" } + +// Children implements Node and returns the USING and WITH CHECK expressions +// as child nodes (any nil expressions are omitted). func (a AlterPolicyOperation) Children() []Node { var children []Node if a.Using != nil { @@ -327,8 +369,12 @@ type AlterConnectorOperation struct { } func (a *AlterConnectorOperation) alterOperationNode() {} + +// TokenLiteral implements Node and returns "ALTER CONNECTOR". func (a AlterConnectorOperation) TokenLiteral() string { return "ALTER CONNECTOR" } -func (a AlterConnectorOperation) Children() []Node { return nil } + +// Children implements Node and returns nil — AlterConnectorOperation has no child nodes. +func (a AlterConnectorOperation) Children() []Node { return nil } // AlterConnectorOwner represents the new owner of a connector type AlterConnectorOwner struct { diff --git a/pkg/sql/ast/ast.go b/pkg/sql/ast/ast.go index a47ed225..53ee4fc8 100644 --- a/pkg/sql/ast/ast.go +++ b/pkg/sql/ast/ast.go @@ -12,38 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides Abstract Syntax Tree (AST) node definitions for SQL statements. -// -// This package implements a comprehensive AST representation for SQL with support for -// multiple SQL dialects (PostgreSQL, MySQL, SQL Server, Oracle, SQLite). It includes -// extensive object pooling for memory efficiency and high-performance SQL parsing. -// -// For complete documentation including architecture overview, usage examples, visitor -// pattern, and feature support matrix, see the package-level documentation in doc.go. -// -// Key features: -// - Complete SQL-99/SQL:2003 statement support (DDL, DML, CTEs, window functions) -// - PostgreSQL extensions (LATERAL, DISTINCT ON, FILTER, RETURNING, JSON operators) -// - Advanced grouping (GROUPING SETS, ROLLUP, CUBE) -// - MERGE statements (SQL:2003 F312) -// - Object pooling for 60-80% memory reduction -// - Thread-safe with zero race conditions -// - Visitor pattern for AST traversal -// -// Quick Start Example: -// -// // Get AST from pool -// astObj := ast.NewAST() -// defer ast.ReleaseAST(astObj) // Always use defer -// -// // Get SELECT statement from pool -// stmt := ast.GetSelectStatement() -// defer ast.PutSelectStatement(stmt) -// -// // Build and use AST nodes... -// -// Version 1.6.0 adds PostgreSQL extensions including LATERAL JOIN, DISTINCT ON, -// FILTER clause, RETURNING clause, JSON/JSONB operators, and FETCH FIRST/NEXT. package ast import ( @@ -1759,14 +1727,32 @@ func (p PartitionDefinition) Children() []Node { return children } -// AST represents the root of the Abstract Syntax Tree +// AST represents the root of the Abstract Syntax Tree produced by parsing one or +// more SQL statements. +// +// AST is obtained from the pool via NewAST and must be returned via ReleaseAST +// when the caller no longer needs it: +// +// tree, err := p.ParseFromModelTokens(tokens) +// if err != nil { return err } +// defer ast.ReleaseAST(tree) +// +// The Statements slice contains one entry per SQL statement separated by +// semicolons. Comments captured during tokenization are preserved in Comments +// for formatters that wish to round-trip them. +// +// SQL() returns the canonical SQL string for all statements joined by ";\n". +// Span() returns the union of all statement spans for source-location tracking. type AST struct { Statements []Statement Comments []models.Comment // Comments captured during tokenization, preserved during formatting } +// TokenLiteral implements Node. Returns an empty string — the AST root has no +// representative keyword. func (a AST) TokenLiteral() string { return "" } +// Children implements Node and returns all top-level statements as a slice of Node. func (a AST) Children() []Node { children := make([]Node, len(a.Statements)) for i, stmt := range a.Statements { diff --git a/pkg/sql/ast/data_type.go b/pkg/sql/ast/data_type.go index ffd1318d..e12c966e 100644 --- a/pkg/sql/ast/data_type.go +++ b/pkg/sql/ast/data_type.go @@ -40,6 +40,8 @@ type EnumMember struct { Value Expression } +// String returns the SQL representation of this enum member, e.g. "'label'" +// or "'label' = expr". func (e *EnumMember) String() string { if e.Value != nil { return fmt.Sprintf("'%s' = %s", escapeString(e.Name), e.Value.TokenLiteral()) @@ -64,6 +66,8 @@ type CharacterLength struct { Unit *CharLengthUnits } +// String returns the SQL representation of this character length specification, +// e.g. "10" or "10 CHARACTERS". func (c *CharacterLength) String() string { if c.Unit != nil { return fmt.Sprintf("%d %s", c.Length, c.Unit) @@ -79,6 +83,7 @@ const ( Octets ) +// String returns the SQL keyword for this character length unit: "CHARACTERS" or "OCTETS". func (u CharLengthUnits) String() string { switch u { case Characters: @@ -96,6 +101,8 @@ type BinaryLength struct { IsMax bool } +// String returns the SQL representation of this binary length: "MAX" when +// IsMax is true, otherwise the numeric length as a string. func (b *BinaryLength) String() string { if b.IsMax { return "MAX" @@ -113,6 +120,8 @@ const ( Tz ) +// String returns the SQL timezone clause for this timezone info, e.g. +// " WITH TIME ZONE", " WITHOUT TIME ZONE", or "" for NoTimezone. func (t TimezoneInfo) String() string { switch t { case NoTimezone: @@ -134,6 +143,8 @@ type ExactNumberInfo struct { Scale *uint64 } +// String returns the SQL precision/scale clause, e.g. "(10)", "(10,2)", or "" +// when no precision is specified. func (e *ExactNumberInfo) String() string { if e.Precision == nil { return "" @@ -151,15 +162,22 @@ type ArrayElemTypeDef struct { Brackets ArrayBracketType } +// ArrayBracketType identifies the bracket style used to express an array type. type ArrayBracketType int const ( + // NoBrackets represents a bare ARRAY keyword with no type argument. NoBrackets ArrayBracketType = iota + // AngleBrackets represents the ARRAY syntax (e.g. BigQuery). AngleBrackets + // SquareBrackets represents the T[] or T[N] syntax (e.g. PostgreSQL). SquareBrackets + // Parentheses represents the Array(T) syntax. Parentheses ) +// String returns the SQL representation of this array element type definition, +// e.g. "ARRAY", "INT[]", "INT[10]", or "Array(INT)". func (a *ArrayElemTypeDef) String() string { if a.Type == nil { return "ARRAY" @@ -277,7 +295,8 @@ func (*JsonType) isDataType() {} func (*BinaryType) isDataType() {} func (*CustomType) isDataType() {} -// String implementations for data types +// String returns the SQL representation of this TABLE type, listing all column +// definitions inside parentheses, e.g. "TABLE(id INT, name VARCHAR(255))". func (t *TableType) String() string { var cols []string for _, col := range t.Columns { @@ -286,6 +305,8 @@ func (t *TableType) String() string { return fmt.Sprintf("TABLE(%s)", strings.Join(cols, ", ")) } +// String returns the SQL representation of this CHARACTER type, e.g. +// "CHARACTER" or "CHARACTER(10)". func (t *CharacterType) String() string { if t.Length == nil { return "CHARACTER" @@ -293,6 +314,8 @@ func (t *CharacterType) String() string { return fmt.Sprintf("CHARACTER(%s)", t.Length) } +// String returns the SQL representation of this VARCHAR type, e.g. +// "VARCHAR" or "VARCHAR(255)". func (t *VarcharType) String() string { if t.Length == nil { return "VARCHAR" @@ -300,6 +323,8 @@ func (t *VarcharType) String() string { return fmt.Sprintf("VARCHAR(%s)", t.Length) } +// String returns the SQL representation of this NUMERIC type, e.g. +// "NUMERIC", "NUMERIC(10)", or "NUMERIC(10,2)". func (t *NumericType) String() string { if t.Info == nil { return "NUMERIC" @@ -307,6 +332,8 @@ func (t *NumericType) String() string { return fmt.Sprintf("NUMERIC%s", t.Info) } +// String returns the SQL representation of this INTEGER type, e.g. +// "INTEGER", "INTEGER(11)", or "INTEGER UNSIGNED". func (t *IntegerType) String() string { var result string if t.Length == nil { @@ -320,6 +347,8 @@ func (t *IntegerType) String() string { return result } +// String returns the SQL representation of this FLOAT type, e.g. +// "FLOAT" or "FLOAT(24)". func (t *FloatType) String() string { if t.Length == nil { return "FLOAT" @@ -327,9 +356,14 @@ func (t *FloatType) String() string { return fmt.Sprintf("FLOAT(%d)", *t.Length) } +// String returns "BOOLEAN". func (*BooleanType) String() string { return "BOOLEAN" } -func (*DateType) String() string { return "DATE" } +// String returns "DATE". +func (*DateType) String() string { return "DATE" } + +// String returns the SQL representation of this TIME type, e.g. +// "TIME", "TIME(3)", "TIME WITH TIME ZONE", or "TIME(6) WITHOUT TIME ZONE". func (t *TimeType) String() string { var result string if t.Precision == nil { @@ -343,6 +377,8 @@ func (t *TimeType) String() string { return result } +// String returns the SQL representation of this TIMESTAMP type, e.g. +// "TIMESTAMP", "TIMESTAMP(3)", or "TIMESTAMP WITH TIME ZONE". func (t *TimestampType) String() string { var result string if t.Precision == nil { @@ -356,6 +392,8 @@ func (t *TimestampType) String() string { return result } +// String returns the SQL representation of this ARRAY type by delegating to the +// element type definition, or "ARRAY" when no element type is specified. func (t *ArrayType) String() string { if t.ElementType == nil { return "ARRAY" @@ -363,6 +401,8 @@ func (t *ArrayType) String() string { return t.ElementType.String() } +// String returns the SQL representation of this ENUM type listing all values, +// e.g. "ENUM('red', 'green', 'blue')" or "ENUM8('a', 'b')". func (t *EnumType) String() string { var values []string for _, v := range t.Values { @@ -377,6 +417,8 @@ func (t *EnumType) String() string { return fmt.Sprintf("%s(%s)", result, strings.Join(values, ", ")) } +// String returns the SQL representation of this SET type listing all values, +// e.g. "SET('a', 'b', 'c')". func (t *SetType) String() string { var values []string for _, v := range t.Values { @@ -385,8 +427,11 @@ func (t *SetType) String() string { return fmt.Sprintf("SET(%s)", strings.Join(values, ", ")) } +// String returns "JSON". func (*JsonType) String() string { return "JSON" } +// String returns the SQL representation of this BINARY type, e.g. +// "BINARY", "BINARY(16)", or "BINARY(MAX)". func (t *BinaryType) String() string { if t.Length == nil { return "BINARY" @@ -394,6 +439,8 @@ func (t *BinaryType) String() string { return fmt.Sprintf("BINARY(%s)", t.Length) } +// String returns the SQL representation of this custom/user-defined type, +// e.g. "my_type" or "my_type(arg1, arg2)". func (t *CustomType) String() string { if len(t.Modifiers) == 0 { return t.Name.String() diff --git a/pkg/sql/ast/dml.go b/pkg/sql/ast/dml.go index 04ce2209..498d63b5 100644 --- a/pkg/sql/ast/dml.go +++ b/pkg/sql/ast/dml.go @@ -27,8 +27,12 @@ type Select struct { Offset *int64 } -func (s *Select) statementNode() {} +func (s *Select) statementNode() {} + +// TokenLiteral implements Node and returns "SELECT". func (s Select) TokenLiteral() string { return "SELECT" } + +// Children implements Node and returns all child nodes of this Select statement. func (s Select) Children() []Node { children := make([]Node, 0) children = append(children, nodifyExpressions(s.Columns)...) @@ -58,8 +62,12 @@ type Insert struct { ReturningClause []Expression } -func (i *Insert) statementNode() {} +func (i *Insert) statementNode() {} + +// TokenLiteral implements Node and returns "INSERT". func (i Insert) TokenLiteral() string { return "INSERT" } + +// Children implements Node and returns all child nodes of this Insert statement. func (i Insert) Children() []Node { children := make([]Node, 0) children = append(children, &i.Table) @@ -78,8 +86,12 @@ type Delete struct { ReturningClause []Expression } -func (d *Delete) statementNode() {} +func (d *Delete) statementNode() {} + +// TokenLiteral implements Node and returns "DELETE". func (d Delete) TokenLiteral() string { return "DELETE" } + +// Children implements Node and returns all child nodes of this Delete statement. func (d Delete) Children() []Node { children := make([]Node, 0) children = append(children, &d.Table) @@ -98,8 +110,12 @@ type Update struct { ReturningClause []Expression } -func (u *Update) statementNode() {} +func (u *Update) statementNode() {} + +// TokenLiteral implements Node and returns "UPDATE". func (u Update) TokenLiteral() string { return "UPDATE" } + +// Children implements Node and returns all child nodes of this Update statement. func (u Update) Children() []Node { children := make([]Node, 0) children = append(children, &u.Table) diff --git a/pkg/sql/ast/doc.go b/pkg/sql/ast/doc.go index 86616fae..43cb6379 100644 --- a/pkg/sql/ast/doc.go +++ b/pkg/sql/ast/doc.go @@ -12,7 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides Abstract Syntax Tree (AST) node definitions for SQL statements. +// Package ast provides Abstract Syntax Tree (AST) node definitions, visitor-based +// traversal, and SQL() serialization for SQL statements parsed by GoSQLX. +// +// Key types include AST (the root container), Statement (SelectStatement, +// InsertStatement, UpdateStatement, DeleteStatement, MergeStatement, etc.), +// Expression (Identifier, BinaryExpression, FunctionCall, CaseExpression, etc.), +// and WindowSpec for window-function specifications. Use NewAST/ReleaseAST, +// GetSelectStatement/PutSelectStatement, and analogous pool helpers to minimize +// allocations. Traverse any AST with Walk (Visitor interface) or Inspect +// (function-based). Call SQL() on any node to serialize it back to a SQL string. // // This package implements a comprehensive AST representation for SQL with support for // multiple SQL dialects (PostgreSQL, MySQL, SQL Server, Oracle, SQLite). It includes diff --git a/pkg/sql/ast/format.go b/pkg/sql/ast/format.go index 26754400..174ae8cf 100644 --- a/pkg/sql/ast/format.go +++ b/pkg/sql/ast/format.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides SQL formatting option types shared between the AST and +// This file provides SQL formatting option types shared between the AST and // the formatter package. // // Rendering logic lives in pkg/formatter — AST nodes are pure data structures diff --git a/pkg/sql/ast/format_compat.go b/pkg/sql/ast/format_compat.go index c07ed9de..e840a166 100644 --- a/pkg/sql/ast/format_compat.go +++ b/pkg/sql/ast/format_compat.go @@ -12,18 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast — backward-compatibility shims. -// -// This file re-adds Format(FormatOptions) string methods that were removed in -// the refactor/move-formatting PR (#342). They now delegate to pkg/formatter -// via the FormatStatementFunc / FormatExpressionFunc / FormatASTFunc hooks -// that pkg/formatter sets in its init() function, avoiding the import cycle +// This file contains backward-compatibility shims for the Format(FormatOptions) +// string methods that were removed in the refactor/move-formatting PR (#342). +// They now delegate to pkg/formatter via hooks (FormatStatementFunc, etc.) that +// pkg/formatter sets in its init() function, avoiding the import cycle: // // pkg/sql/ast → pkg/formatter → pkg/sql/ast // -// If pkg/formatter has not been imported the methods fall back to SQL() output -// so that existing code continues to compile and run (with unformatted output). -// +// If pkg/formatter has not been imported the methods fall back to SQL() output. // All methods are marked deprecated; callers should migrate to // pkg/formatter.FormatStatement(), FormatExpression() or FormatAST() directly. package ast diff --git a/pkg/sql/ast/function.go b/pkg/sql/ast/function.go index 8d6f5c04..749504b3 100644 --- a/pkg/sql/ast/function.go +++ b/pkg/sql/ast/function.go @@ -38,6 +38,8 @@ type FunctionDesc struct { Arguments []string } +// String returns the SQL representation of this function descriptor, including +// the optional schema qualifier and argument list (e.g. "schema.func(arg1, arg2)"). func (f FunctionDesc) String() string { if len(f.Arguments) == 0 { if f.Schema != "" { @@ -52,6 +54,9 @@ func (f FunctionDesc) String() string { return fmt.Sprintf("%s(%s)", f.Name, f.Arguments) } -// Implement Node interface -func (f FunctionDesc) Children() []Node { return nil } +// Children implements Node and returns nil — FunctionDesc has no child nodes. +func (f FunctionDesc) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL representation of this +// function descriptor (delegates to String). func (f FunctionDesc) TokenLiteral() string { return f.String() } diff --git a/pkg/sql/ast/object.go b/pkg/sql/ast/object.go index 60500c49..7d274963 100644 --- a/pkg/sql/ast/object.go +++ b/pkg/sql/ast/object.go @@ -19,6 +19,11 @@ type ObjectName struct { Name string } +// TokenLiteral implements Node and returns the object name string. func (o ObjectName) TokenLiteral() string { return o.Name } -func (o ObjectName) Children() []Node { return nil } -func (o ObjectName) String() string { return o.Name } + +// Children implements Node and returns nil — ObjectName has no child nodes. +func (o ObjectName) Children() []Node { return nil } + +// String returns the object name as a plain string. +func (o ObjectName) String() string { return o.Name } diff --git a/pkg/sql/ast/operator.go b/pkg/sql/ast/operator.go index 8f3c5645..f6f5bb98 100644 --- a/pkg/sql/ast/operator.go +++ b/pkg/sql/ast/operator.go @@ -12,9 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides operator definitions for SQL expressions. -// -// This file defines unary and binary operators supported in SQL expressions, +// This file defines unary and binary operator types for SQL expressions, // including standard SQL operators and PostgreSQL-specific extensions. package ast diff --git a/pkg/sql/ast/pool.go b/pkg/sql/ast/pool.go index 0f215c25..c8a984b6 100644 --- a/pkg/sql/ast/pool.go +++ b/pkg/sql/ast/pool.go @@ -12,18 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides object pooling for AST nodes to minimize allocations. -// // This file implements comprehensive object pooling for all major AST node types -// using sync.Pool. The pooling system provides: -// - 60-80% memory reduction in production workloads -// - 95%+ pool hit rates with proper usage patterns -// - Thread-safe operations (zero race conditions) -// - Iterative cleanup to prevent stack overflow +// using sync.Pool. The pooling system provides 60-80% memory reduction in production +// workloads and 95%+ pool hit rates with proper usage patterns. // // IMPORTANT: Always use defer when returning pooled objects to prevent leaks. -// -// See also: doc.go for complete pooling documentation and usage examples +// See doc.go for complete pooling documentation and usage examples. package ast import ( diff --git a/pkg/sql/ast/span.go b/pkg/sql/ast/span.go index ecad4110..fe50be60 100644 --- a/pkg/sql/ast/span.go +++ b/pkg/sql/ast/span.go @@ -162,6 +162,9 @@ func (e *BinaryExpression) Span() models.Span { return UnionSpans(spans) } +// Span returns the source location span for this unary expression. +// It delegates to the inner expression's span, or returns an empty span +// if the inner expression does not implement Spanned. func (e *UnaryExpression) Span() models.Span { if e.Expr != nil { if spanned, ok := e.Expr.(Spanned); ok { @@ -171,6 +174,9 @@ func (e *UnaryExpression) Span() models.Span { return models.EmptySpan() } +// Span returns the source location span for this CAST expression. +// It delegates to the inner expression's span, or returns an empty span +// if the inner expression does not implement Spanned. func (e *CastExpression) Span() models.Span { if e.Expr != nil { if spanned, ok := e.Expr.(Spanned); ok { @@ -180,11 +186,15 @@ func (e *CastExpression) Span() models.Span { return models.EmptySpan() } +// Span returns an empty source location span for this interval expression. +// IntervalExpression stores no child expressions, so no span can be derived. func (i *IntervalExpression) Span() models.Span { // IntervalExpression has no child expressions, return empty span return models.EmptySpan() } +// Span returns the union of source location spans for all arguments of this +// function call. Returns an empty span if no arguments implement Spanned. func (e *FunctionCall) Span() models.Span { spans := make([]models.Span, 0) diff --git a/pkg/sql/ast/sql.go b/pkg/sql/ast/sql.go index cf6d481e..0a75461c 100644 --- a/pkg/sql/ast/sql.go +++ b/pkg/sql/ast/sql.go @@ -12,10 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package ast provides SQL serialization for AST nodes. -// // This file implements SQL() string methods on all AST node types, -// enabling AST→SQL roundtrip support (Issue #221). +// enabling AST→SQL roundtrip serialization. package ast import ( @@ -125,6 +123,9 @@ func escapeStringLiteral(s string) string { return b.String() } +// SQL returns the SQL literal representation of this value. +// Strings are single-quoted with proper escaping; NULLs are returned as "NULL"; +// booleans are returned in uppercase (TRUE/FALSE); numbers are returned as-is. func (l *LiteralValue) SQL() string { if l == nil { return "" @@ -142,6 +143,7 @@ func (l *LiteralValue) SQL() string { } } +// SQL returns the unquoted identifier name. Used for round-trip serialization. func (i *Ident) SQL() string { if i == nil { return "" @@ -149,6 +151,9 @@ func (i *Ident) SQL() string { return i.Name } +// SQL returns the SQL representation of this binary expression. +// The operator, left, and right sub-expressions are serialized in infix notation. +// NOT-qualified operators (LIKE, ILIKE, SIMILAR TO) are rendered as "x NOT OP y". func (b *BinaryExpression) SQL() string { if b == nil { return "" @@ -180,6 +185,9 @@ func (b *BinaryExpression) SQL() string { return fmt.Sprintf("%s %s %s", left, op, right) } +// SQL returns the SQL representation of this unary expression. +// Prefix operators (NOT, +, -, etc.) are prepended; the PostgreSQL +// postfix factorial operator (!) is appended. func (u *UnaryExpression) SQL() string { if u == nil { return "" @@ -199,6 +207,8 @@ func (u *UnaryExpression) SQL() string { } } +// SQL returns the SQL representation of this aliased expression in the form +// "expr AS alias". func (a *AliasedExpression) SQL() string { if a == nil { return "" @@ -206,6 +216,8 @@ func (a *AliasedExpression) SQL() string { return exprSQL(a.Expr) + " AS " + a.Alias } +// SQL returns the SQL representation of this CAST expression in the form +// "CAST(expr AS type)". func (c *CastExpression) SQL() string { if c == nil { return "" @@ -213,6 +225,8 @@ func (c *CastExpression) SQL() string { return fmt.Sprintf("CAST(%s AS %s)", exprSQL(c.Expr), c.Type) } +// SQL returns the SQL representation of this CASE expression including all +// WHEN/THEN clauses and an optional ELSE clause. func (c *CaseExpression) SQL() string { if c == nil { return "" @@ -238,6 +252,8 @@ func (c *CaseExpression) SQL() string { return sb.String() } +// SQL returns the SQL representation of this WHEN clause in the form +// "WHEN condition THEN result". func (w *WhenClause) SQL() string { if w == nil { return "" @@ -245,6 +261,8 @@ func (w *WhenClause) SQL() string { return fmt.Sprintf("WHEN %s THEN %s", exprSQL(w.Condition), exprSQL(w.Result)) } +// SQL returns the SQL representation of this BETWEEN expression. +// The NOT modifier is included when BetweenExpression.Not is true. func (b *BetweenExpression) SQL() string { if b == nil { return "" @@ -256,6 +274,9 @@ func (b *BetweenExpression) SQL() string { return fmt.Sprintf("%s %sBETWEEN %s AND %s", exprSQL(b.Expr), not, exprSQL(b.Lower), exprSQL(b.Upper)) } +// SQL returns the SQL representation of this IN expression. +// The NOT modifier is included when InExpression.Not is true. +// Supports both value lists "x IN (1, 2, 3)" and subqueries "x IN (SELECT ...)". func (i *InExpression) SQL() string { if i == nil { return "" @@ -274,6 +295,7 @@ func (i *InExpression) SQL() string { return fmt.Sprintf("%s %sIN (%s)", exprSQL(i.Expr), not, strings.Join(vals, ", ")) } +// SQL returns the SQL representation of this EXISTS expression as "EXISTS (subquery)". func (e *ExistsExpression) SQL() string { if e == nil { return "" @@ -281,6 +303,7 @@ func (e *ExistsExpression) SQL() string { return fmt.Sprintf("EXISTS (%s)", stmtSQL(e.Subquery)) } +// SQL returns the SQL representation of this scalar subquery as "(SELECT ...)". func (s *SubqueryExpression) SQL() string { if s == nil { return "" @@ -288,6 +311,7 @@ func (s *SubqueryExpression) SQL() string { return fmt.Sprintf("(%s)", stmtSQL(s.Subquery)) } +// SQL returns the SQL representation of this ANY expression as "expr op ANY (subquery)". func (a *AnyExpression) SQL() string { if a == nil { return "" @@ -295,6 +319,7 @@ func (a *AnyExpression) SQL() string { return fmt.Sprintf("%s %s ANY (%s)", exprSQL(a.Expr), a.Operator, stmtSQL(a.Subquery)) } +// SQL returns the SQL representation of this ALL expression as "expr op ALL (subquery)". func (a *AllExpression) SQL() string { if a == nil { return "" @@ -302,6 +327,10 @@ func (a *AllExpression) SQL() string { return fmt.Sprintf("%s %s ALL (%s)", exprSQL(a.Expr), a.Operator, stmtSQL(a.Subquery)) } +// SQL returns the SQL representation of this function call including arguments, +// optional DISTINCT modifier, ORDER BY clause (for aggregates like STRING_AGG), +// WITHIN GROUP (for ordered-set aggregates), FILTER (WHERE ...) clause, and +// OVER (...) window specification. func (f *FunctionCall) SQL() string { if f == nil { return "" @@ -341,6 +370,7 @@ func (f *FunctionCall) SQL() string { return sb.String() } +// SQL returns "EXTRACT(field FROM source)" as a SQL string. func (e *ExtractExpression) SQL() string { if e == nil { return "" @@ -348,6 +378,7 @@ func (e *ExtractExpression) SQL() string { return fmt.Sprintf("EXTRACT(%s FROM %s)", e.Field, exprSQL(e.Source)) } +// SQL returns "POSITION(substr IN str)" as a SQL string. func (p *PositionExpression) SQL() string { if p == nil { return "" @@ -355,6 +386,9 @@ func (p *PositionExpression) SQL() string { return fmt.Sprintf("POSITION(%s IN %s)", exprSQL(p.Substr), exprSQL(p.Str)) } +// SQL returns the SQL representation of this SUBSTRING expression. +// With a length: "SUBSTRING(str FROM start FOR length)". +// Without a length: "SUBSTRING(str FROM start)". func (s *SubstringExpression) SQL() string { if s == nil { return "" @@ -365,6 +399,7 @@ func (s *SubstringExpression) SQL() string { return fmt.Sprintf("SUBSTRING(%s FROM %s)", exprSQL(s.Str), exprSQL(s.Start)) } +// SQL returns the SQL representation of this interval as "INTERVAL 'value'". func (i *IntervalExpression) SQL() string { if i == nil { return "" @@ -372,6 +407,8 @@ func (i *IntervalExpression) SQL() string { return fmt.Sprintf("INTERVAL '%s'", i.Value) } +// SQL returns the SQL representation of this list expression as a +// comma-separated parenthesized list: "(v1, v2, v3)". func (l *ListExpression) SQL() string { if l == nil { return "" @@ -383,6 +420,8 @@ func (l *ListExpression) SQL() string { return strings.Join(vals, ", ") } +// SQL returns the SQL representation of this tuple expression as a +// parenthesized comma-separated list: "(v1, v2, v3)". func (t *TupleExpression) SQL() string { if t == nil { return "" @@ -394,6 +433,9 @@ func (t *TupleExpression) SQL() string { return "(" + strings.Join(vals, ", ") + ")" } +// SQL returns the SQL representation of this ARRAY constructor. +// From a subquery: "ARRAY(SELECT ...)". +// From an element list: "ARRAY[e1, e2, e3]". func (a *ArrayConstructorExpression) SQL() string { if a == nil { return "" @@ -408,6 +450,8 @@ func (a *ArrayConstructorExpression) SQL() string { return "ARRAY[" + strings.Join(vals, ", ") + "]" } +// SQL returns the SQL representation of this array subscript expression, +// e.g. "arr[1]" or "matrix[i][j]" for multi-dimensional subscripts. func (a *ArraySubscriptExpression) SQL() string { if a == nil { return "" @@ -419,6 +463,8 @@ func (a *ArraySubscriptExpression) SQL() string { return s } +// SQL returns the SQL representation of this array slice expression, +// e.g. "arr[1:3]", "arr[2:]", "arr[:5]", or "arr[:]". func (a *ArraySliceExpression) SQL() string { if a == nil { return "" @@ -436,6 +482,8 @@ func (a *ArraySliceExpression) SQL() string { // GROUP BY advanced expressions +// SQL returns the SQL representation of this ROLLUP expression as +// "ROLLUP(col1, col2, ...)". func (r *RollupExpression) SQL() string { if r == nil { return "" @@ -443,6 +491,8 @@ func (r *RollupExpression) SQL() string { return "ROLLUP(" + exprListSQL(r.Expressions) + ")" } +// SQL returns the SQL representation of this CUBE expression as +// "CUBE(col1, col2, ...)". func (c *CubeExpression) SQL() string { if c == nil { return "" @@ -450,6 +500,8 @@ func (c *CubeExpression) SQL() string { return "CUBE(" + exprListSQL(c.Expressions) + ")" } +// SQL returns the SQL representation of this GROUPING SETS expression as +// "GROUPING SETS((a, b), (a), ())". func (g *GroupingSetsExpression) SQL() string { if g == nil { return "" @@ -465,6 +517,10 @@ func (g *GroupingSetsExpression) SQL() string { // Statements // ============================================================ +// SQL returns the full SQL string for this SELECT statement, including all +// clauses: WITH, DISTINCT ON/DISTINCT, SELECT list, FROM, JOIN, WHERE, +// GROUP BY, HAVING, WINDOW, ORDER BY, LIMIT, OFFSET, FETCH, and FOR. +// This enables round-trip serialization of parsed queries. func (s *SelectStatement) SQL() string { if s == nil { return "" @@ -552,6 +608,9 @@ func (s *SelectStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this INSERT statement, including the +// optional WITH clause, column list, VALUES rows or SELECT subquery, ON CONFLICT +// clause, and RETURNING clause. func (i *InsertStatement) SQL() string { if i == nil { return "" @@ -601,6 +660,9 @@ func (i *InsertStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this UPDATE statement, including the +// optional WITH clause, SET assignments, FROM clause, WHERE condition, and +// RETURNING clause. func (u *UpdateStatement) SQL() string { if u == nil { return "" @@ -650,6 +712,8 @@ func (u *UpdateStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this DELETE statement, including the +// optional WITH clause, USING clause, WHERE condition, and RETURNING clause. func (d *DeleteStatement) SQL() string { if d == nil { return "" @@ -691,6 +755,8 @@ func (d *DeleteStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this CREATE TABLE statement including +// column definitions, table constraints, INHERITS, PARTITION BY, and table options. func (c *CreateTableStatement) SQL() string { if c == nil { return "" @@ -737,6 +803,8 @@ func (c *CreateTableStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this CREATE INDEX statement including +// the UNIQUE modifier, IF NOT EXISTS, USING method, column list, and WHERE predicate. func (c *CreateIndexStatement) SQL() string { if c == nil { return "" @@ -780,6 +848,9 @@ func (c *CreateIndexStatement) SQL() string { return sb.String() } +// SQL returns the SQL representation of this ALTER TABLE statement. +// Note: the parser returns AlterStatement (in alter.go); this method +// is for manually-constructed AlterTableStatement values. func (a *AlterTableStatement) SQL() string { if a == nil { return "" @@ -796,6 +867,8 @@ func (a *AlterTableStatement) SQL() string { return sb.String() } +// SQL returns the SQL representation of this DROP statement, including the +// object type, optional IF EXISTS, object names, and CASCADE/RESTRICT behavior. func (d *DropStatement) SQL() string { if d == nil { return "" @@ -816,6 +889,8 @@ func (d *DropStatement) SQL() string { return sb.String() } +// SQL returns the SQL representation of this TRUNCATE statement, including +// table names, RESTART/CONTINUE IDENTITY options, and CASCADE/RESTRICT behavior. func (t *TruncateStatement) SQL() string { if t == nil { return "" @@ -836,6 +911,8 @@ func (t *TruncateStatement) SQL() string { return sb.String() } +// SQL returns the SQL representation of this WITH clause including any RECURSIVE +// modifier and all CTE definitions. func (w *WithClause) SQL() string { if w == nil { return "" @@ -854,6 +931,8 @@ func (w *WithClause) SQL() string { return sb.String() } +// SQL returns the SQL representation of this set operation as +// "left UNION|EXCEPT|INTERSECT [ALL] right". func (s *SetOperation) SQL() string { if s == nil { return "" @@ -867,6 +946,8 @@ func (s *SetOperation) SQL() string { return fmt.Sprintf("%s %s %s", left, op, right) } +// SQL returns the SQL representation of this VALUES clause as +// "VALUES (v1, v2), (v3, v4), ...". func (v *Values) SQL() string { if v == nil { return "" @@ -882,6 +963,9 @@ func (v *Values) SQL() string { return "VALUES " + strings.Join(rows, ", ") } +// SQL returns the full SQL string for this CREATE VIEW statement including the +// optional OR REPLACE, TEMPORARY, IF NOT EXISTS, column list, SELECT query, +// and WITH CHECK OPTION clause. func (c *CreateViewStatement) SQL() string { if c == nil { return "" @@ -914,6 +998,7 @@ func (c *CreateViewStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this CREATE MATERIALIZED VIEW statement. func (c *CreateMaterializedViewStatement) SQL() string { if c == nil { return "" @@ -942,6 +1027,7 @@ func (c *CreateMaterializedViewStatement) SQL() string { return sb.String() } +// SQL returns the SQL string for this REFRESH MATERIALIZED VIEW statement. func (r *RefreshMaterializedViewStatement) SQL() string { if r == nil { return "" @@ -963,6 +1049,8 @@ func (r *RefreshMaterializedViewStatement) SQL() string { return sb.String() } +// SQL returns the full SQL string for this MERGE statement including the target, +// source, ON condition, and all WHEN MATCHED/NOT MATCHED clauses. func (m *MergeStatement) SQL() string { if m == nil { return "" @@ -1011,6 +1099,9 @@ func (m *MergeStatement) SQL() string { // DML types from dml.go +// SQL returns the SQL representation of this Select node (dml.go type), which +// is a simplified SELECT structure used in compatibility code paths. +// For full-featured SELECT parsing use SelectStatement.SQL() instead. func (s *Select) SQL() string { if s == nil { return "" @@ -1055,6 +1146,8 @@ func (s *Select) SQL() string { return sb.String() } +// SQL returns the SQL representation of this Insert node (dml.go type). +// For the full-featured INSERT use InsertStatement.SQL() instead. func (i *Insert) SQL() string { if i == nil { return "" @@ -1087,6 +1180,8 @@ func (i *Insert) SQL() string { return sb.String() } +// SQL returns the SQL representation of this Update node (dml.go type). +// For the full-featured UPDATE use UpdateStatement.SQL() instead. func (u *Update) SQL() string { if u == nil { return "" @@ -1112,6 +1207,8 @@ func (u *Update) SQL() string { return sb.String() } +// SQL returns the SQL representation of this Delete node (dml.go type). +// For the full-featured DELETE use DeleteStatement.SQL() instead. func (d *Delete) SQL() string { if d == nil { return "" diff --git a/pkg/sql/ast/trigger.go b/pkg/sql/ast/trigger.go index 1fa67f46..5a42b7f8 100644 --- a/pkg/sql/ast/trigger.go +++ b/pkg/sql/ast/trigger.go @@ -43,6 +43,7 @@ const ( TriggerObjectStatement ) +// String returns the SQL keyword for this trigger object: "ROW" or "STATEMENT". func (t TriggerObject) String() string { switch t { case TriggerObjectRow: @@ -63,6 +64,7 @@ const ( TriggerReferencingNewTable ) +// String returns the SQL phrase for this referencing type: "OLD TABLE" or "NEW TABLE". func (t TriggerReferencingType) String() string { switch t { case TriggerReferencingOldTable: @@ -82,6 +84,8 @@ type TriggerReferencing struct { TransitionRelationName ObjectName } +// String returns the SQL representation of this transition relation declaration, +// e.g. "OLD TABLE AS old_rows" or "NEW TABLE new_rows". func (t TriggerReferencing) String() string { var as string if t.IsAs { @@ -96,15 +100,22 @@ type TriggerEvent struct { Columns []Identifier // Only used for UPDATE events } +// TriggerEventType identifies which DML event fires the trigger. type TriggerEventType int const ( + // TriggerEventInsert fires the trigger on INSERT. TriggerEventInsert TriggerEventType = iota + // TriggerEventUpdate fires the trigger on UPDATE (optionally of specific columns). TriggerEventUpdate + // TriggerEventDelete fires the trigger on DELETE. TriggerEventDelete + // TriggerEventTruncate fires the trigger on TRUNCATE. TriggerEventTruncate ) +// String returns the SQL representation of this trigger event: "INSERT", +// "UPDATE", "UPDATE OF col1, col2", "DELETE", or "TRUNCATE". func (t TriggerEvent) String() string { switch t.Type { case TriggerEventInsert: @@ -136,6 +147,8 @@ const ( TriggerPeriodInsteadOf ) +// String returns the SQL keyword for this trigger period: "AFTER", "BEFORE", +// or "INSTEAD OF". func (t TriggerPeriod) String() string { switch t { case TriggerPeriodAfter: @@ -157,6 +170,7 @@ const ( TriggerExecBodyProcedure ) +// String returns the SQL keyword for this execution body type: "FUNCTION" or "PROCEDURE". func (t TriggerExecBodyType) String() string { switch t { case TriggerExecBodyFunction: @@ -174,18 +188,42 @@ type TriggerExecBody struct { FuncDesc FunctionDesc } +// String returns the SQL representation of this trigger execution body, +// e.g. "FUNCTION schema.my_func()" or "PROCEDURE my_proc()". func (t TriggerExecBody) String() string { return fmt.Sprintf("%s %s", t.ExecType, t.FuncDesc) } -// Implement Node interface for trigger types -func (t TriggerObject) Children() []Node { return nil } -func (t TriggerObject) TokenLiteral() string { return t.String() } -func (t TriggerReferencing) Children() []Node { return nil } +// Children implements Node and returns nil — TriggerObject has no child nodes. +func (t TriggerObject) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL keyword for this trigger object. +func (t TriggerObject) TokenLiteral() string { return t.String() } + +// Children implements Node and returns nil — TriggerReferencing has no child nodes. +func (t TriggerReferencing) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL representation of this +// transition relation declaration. func (t TriggerReferencing) TokenLiteral() string { return t.String() } -func (t TriggerEvent) Children() []Node { return nil } -func (t TriggerEvent) TokenLiteral() string { return t.String() } -func (t TriggerPeriod) Children() []Node { return nil } -func (t TriggerPeriod) TokenLiteral() string { return t.String() } -func (t TriggerExecBody) Children() []Node { return nil } -func (t TriggerExecBody) TokenLiteral() string { return t.String() } + +// Children implements Node and returns nil — TriggerEvent has no child nodes. +func (t TriggerEvent) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL representation of this +// trigger event (e.g. "INSERT", "UPDATE OF col", "DELETE"). +func (t TriggerEvent) TokenLiteral() string { return t.String() } + +// Children implements Node and returns nil — TriggerPeriod has no child nodes. +func (t TriggerPeriod) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL keyword for this trigger +// period ("AFTER", "BEFORE", or "INSTEAD OF"). +func (t TriggerPeriod) TokenLiteral() string { return t.String() } + +// Children implements Node and returns nil — TriggerExecBody has no child nodes. +func (t TriggerExecBody) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL representation of this +// trigger execution body. +func (t TriggerExecBody) TokenLiteral() string { return t.String() } diff --git a/pkg/sql/ast/types.go b/pkg/sql/ast/types.go index c412f13f..ce49b23d 100644 --- a/pkg/sql/ast/types.go +++ b/pkg/sql/ast/types.go @@ -45,8 +45,11 @@ const ( AlterColumnDropNotNull ) -// Make AlterColumnOperation implement Node interface +// Children implements Node and returns nil — AlterColumnOperation has no child nodes. func (a *AlterColumnOperation) Children() []Node { return nil } + +// TokenLiteral implements Node and returns the SQL keyword phrase for this +// ALTER COLUMN operation (e.g. "SET DEFAULT", "DROP NOT NULL"). func (a *AlterColumnOperation) TokenLiteral() string { switch *a { case AlterColumnSetDefault: @@ -102,8 +105,11 @@ type Query struct { Text string } +// TokenLiteral implements Node and returns "QUERY". func (q *Query) TokenLiteral() string { return "QUERY" } -func (q *Query) Children() []Node { return nil } + +// Children implements Node and returns nil — Query has no child nodes. +func (q *Query) Children() []Node { return nil } // Setting represents a SET clause in an UPDATE statement type Setting struct { @@ -119,9 +125,13 @@ type Ident struct { func (i *Ident) String() string { return i.Name } // Make Ident implement Expression interface -func (*Ident) expressionNode() {} +func (*Ident) expressionNode() {} + +// TokenLiteral implements Node and returns the identifier name. func (i *Ident) TokenLiteral() string { return i.Name } -func (i *Ident) Children() []Node { return nil } + +// Children implements Node and returns nil — Ident has no child nodes. +func (i *Ident) Children() []Node { return nil } // InputFormatClause represents the format specification for input data type InputFormatClause struct { @@ -137,8 +147,11 @@ type CommentDef struct { Text string } +// TokenLiteral implements Node and returns "COMMENT". func (c *CommentDef) TokenLiteral() string { return "COMMENT" } -func (c *CommentDef) Children() []Node { return nil } + +// Children implements Node and returns nil — CommentDef has no child nodes. +func (c *CommentDef) Children() []Node { return nil } // OnCommit represents the ON COMMIT behavior for temporary tables type OnCommit int @@ -160,7 +173,12 @@ type OneOrManyWithParens[T any] struct { Items []T } +// TokenLiteral implements Node and returns "(" to represent the opening +// parenthesis of the parenthesized list. func (o *OneOrManyWithParens[T]) TokenLiteral() string { return "(" } + +// Children implements Node and returns all items as Node values (items that +// do not implement Node are represented as nil slots). func (o *OneOrManyWithParens[T]) Children() []Node { nodes := make([]Node, len(o.Items)) for i, item := range o.Items { @@ -177,7 +195,12 @@ type WrappedCollection[T any] struct { Wrapper string } +// TokenLiteral implements Node and returns the wrapper keyword (e.g. the SQL +// keyword that introduces the collection). func (w *WrappedCollection[T]) TokenLiteral() string { return w.Wrapper } + +// Children implements Node and returns all items as Node values (items that +// do not implement Node are represented as nil slots). func (w *WrappedCollection[T]) Children() []Node { nodes := make([]Node, len(w.Items)) for i, item := range w.Items { @@ -194,8 +217,11 @@ type ClusteredBy struct { Buckets int } +// TokenLiteral implements Node and returns "CLUSTERED BY". func (c *ClusteredBy) TokenLiteral() string { return "CLUSTERED BY" } -func (c *ClusteredBy) Children() []Node { return c.Columns } + +// Children implements Node and returns the columns used in the CLUSTERED BY clause. +func (c *ClusteredBy) Children() []Node { return c.Columns } // RowAccessPolicy represents row-level access policy type RowAccessPolicy struct { @@ -204,7 +230,10 @@ type RowAccessPolicy struct { Enabled bool } +// TokenLiteral implements Node and returns "ROW ACCESS POLICY". func (r *RowAccessPolicy) TokenLiteral() string { return "ROW ACCESS POLICY" } + +// Children implements Node and returns the filter expression if present, or nil. func (r *RowAccessPolicy) Children() []Node { if r.Filter != nil { return []Node{r.Filter} @@ -238,9 +267,13 @@ type StatementImpl struct { Variant StatementVariant } +// TokenLiteral implements Node by delegating to the wrapped StatementVariant. func (s *StatementImpl) TokenLiteral() string { return s.Variant.TokenLiteral() } -func (s *StatementImpl) Children() []Node { return []Node{s.Variant} } -func (s *StatementImpl) statementNode() {} + +// Children implements Node and returns the wrapped StatementVariant as a single child. +func (s *StatementImpl) Children() []Node { return []Node{s.Variant} } + +func (s *StatementImpl) statementNode() {} // CreateTable represents a CREATE TABLE statement type CreateTable struct { @@ -297,6 +330,10 @@ type CreateTable struct { } func (*CreateTable) statementNode() {} + +// Children implements Node and returns all child nodes: the table name, column +// definitions, constraints, optional subquery, LIKE/CLONE targets, comment, and +// CLUSTERED BY / ROW ACCESS POLICY clauses. func (c *CreateTable) Children() []Node { nodes := []Node{c.Name} for _, col := range c.Columns { @@ -325,4 +362,6 @@ func (c *CreateTable) Children() []Node { } return nodes } + +// TokenLiteral implements Node and returns "CREATE TABLE". func (c *CreateTable) TokenLiteral() string { return "CREATE TABLE" } diff --git a/pkg/sql/ast/value.go b/pkg/sql/ast/value.go index 84ca60f4..26c6db06 100644 --- a/pkg/sql/ast/value.go +++ b/pkg/sql/ast/value.go @@ -44,26 +44,47 @@ type Value struct { type ValueType int const ( + // NumberValue represents a numeric literal. NumberValue ValueType = iota + // SingleQuotedStringValue represents a single-quoted string literal. SingleQuotedStringValue + // DollarQuotedStringValue represents a PostgreSQL dollar-quoted string literal. DollarQuotedStringValue + // TripleSingleQuotedStringValue represents a triple-single-quoted string literal. TripleSingleQuotedStringValue + // TripleDoubleQuotedStringValue represents a triple-double-quoted string literal. TripleDoubleQuotedStringValue + // EscapedStringLiteralValue represents a C-style escaped string (E'...'). EscapedStringLiteralValue + // UnicodeStringLiteralValue represents a Unicode string literal (U&'...'). UnicodeStringLiteralValue + // SingleQuotedByteStringLiteralValue represents a byte string with single quotes (B'...'). SingleQuotedByteStringLiteralValue + // DoubleQuotedByteStringLiteralValue represents a byte string with double quotes (B"..."). DoubleQuotedByteStringLiteralValue + // TripleSingleQuotedByteStringLiteralValue represents a byte string with triple single quotes. TripleSingleQuotedByteStringLiteralValue + // TripleDoubleQuotedByteStringLiteralValue represents a byte string with triple double quotes. TripleDoubleQuotedByteStringLiteralValue + // SingleQuotedRawStringLiteralValue represents a raw string with single quotes (R'...'). SingleQuotedRawStringLiteralValue + // DoubleQuotedRawStringLiteralValue represents a raw string with double quotes (R"..."). DoubleQuotedRawStringLiteralValue + // TripleSingleQuotedRawStringLiteralValue represents a raw string with triple single quotes. TripleSingleQuotedRawStringLiteralValue + // TripleDoubleQuotedRawStringLiteralValue represents a raw string with triple double quotes. TripleDoubleQuotedRawStringLiteralValue + // NationalStringLiteralValue represents a national character string literal (N'...'). NationalStringLiteralValue + // HexStringLiteralValue represents a hexadecimal string literal (X'...'). HexStringLiteralValue + // DoubleQuotedStringValue represents a double-quoted string literal. DoubleQuotedStringValue + // BooleanValue represents a boolean literal (TRUE or FALSE). BooleanValue + // NullValue represents the SQL NULL literal. NullValue + // PlaceholderValue represents a query parameter placeholder (e.g. $1, ?, :name). PlaceholderValue ) @@ -73,6 +94,9 @@ type DollarQuotedString struct { Tag string } +// String returns the SQL literal representation of this value, including +// appropriate quoting and escaping for each ValueType (e.g. single-quoted +// strings, dollar-quoted strings, hex strings, boolean literals, NULL, etc.). func (v Value) String() string { switch v.Type { case NumberValue: @@ -155,10 +179,13 @@ type Number struct { Long bool } +// Children implements Node and returns nil — Value has no child nodes. func (v Value) Children() []Node { return nil } +// TokenLiteral implements Node and returns the SQL literal representation of +// this value (delegates to String). func (v Value) TokenLiteral() string { return v.String() } @@ -287,6 +314,8 @@ const ( CustomDateTime ) +// String returns the SQL keyword for this date/time field (e.g. "YEAR", +// "MONTH", "DAY", "HOUR", "MINUTE", "SECOND", etc.). func (d DateTimeField) String() string { switch d { case Year: @@ -391,6 +420,7 @@ const ( NFKD ) +// String returns the Unicode normalization form name ("NFC", "NFD", "NFKC", or "NFKD"). func (n NormalizationForm) String() string { switch n { case NFC: @@ -415,6 +445,7 @@ const ( Trailing ) +// String returns the SQL keyword for this trim direction: "BOTH", "LEADING", or "TRAILING". func (t TrimWhereField) String() string { switch t { case Both: diff --git a/pkg/sql/parser/doc.go b/pkg/sql/parser/doc.go index 8a0dc215..55b4a719 100644 --- a/pkg/sql/parser/doc.go +++ b/pkg/sql/parser/doc.go @@ -15,6 +15,12 @@ // Package parser provides a high-performance, production-ready recursive descent SQL parser // that converts tokenized SQL into a comprehensive Abstract Syntax Tree (AST). // +// The primary entry points are GetParser (pool-based instantiation), ParseFromModelTokens +// (converts []models.TokenWithSpan from the tokenizer into parser tokens), and +// ParseWithPositions (produces an *ast.AST with full position information). +// For dialect-aware parsing, use ParseWithDialect. For concurrent use, always obtain a +// parser instance via GetParser and return it with PutParser (or defer parser.PutParser(p)). +// // # Overview // // The parser implements a predictive recursive descent parser with one-token lookahead, diff --git a/pkg/sql/parser/parser.go b/pkg/sql/parser/parser.go index 95784d52..183730b0 100644 --- a/pkg/sql/parser/parser.go +++ b/pkg/sql/parser/parser.go @@ -12,57 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package parser provides a high-performance recursive descent SQL parser that converts -// tokenized SQL into a comprehensive Abstract Syntax Tree (AST). -// -// The parser supports enterprise-grade SQL parsing with 1.38M+ ops/sec throughput, -// comprehensive multi-dialect support (PostgreSQL, MySQL, SQL Server, Oracle, SQLite), -// and production-ready features including DoS protection, context cancellation, and -// object pooling for optimal memory efficiency. -// -// # Quick Start -// -// // Get parser from pool -// parser := parser.GetParser() -// defer parser.PutParser(parser) -// -// // Parse tokens to AST -// result := parser.ParseFromModelTokens(tokens) -// astObj, err := parser.ParseWithPositions(result) -// defer ast.ReleaseAST(astObj) -// -// # v1.6.0 PostgreSQL Extensions -// -// - LATERAL JOIN: Correlated subqueries in FROM clause -// - JSON/JSONB Operators: All 10 operators (->/->>/#>/#>>/@>/<@/?/?|/?&/#-) -// - DISTINCT ON: PostgreSQL-specific row deduplication -// - FILTER Clause: Conditional aggregation (SQL:2003 T612) -// - RETURNING Clause: Return modified rows from DML statements -// - Aggregate ORDER BY: ORDER BY inside STRING_AGG, ARRAY_AGG -// -// # v1.5.0 Features (SQL-99 Compliance) -// -// - GROUPING SETS, ROLLUP, CUBE: Advanced grouping (SQL-99 T431) -// - MERGE Statements: SQL:2003 MERGE with MATCHED/NOT MATCHED -// - Materialized Views: CREATE/REFRESH/DROP with CONCURRENTLY -// - FETCH Clause: SQL-99 F861/F862 with PERCENT, ONLY, WITH TIES -// - TRUNCATE: Enhanced with RESTART/CONTINUE IDENTITY -// -// # v1.3.0 Window Functions (Phase 2.5) -// -// - Window Functions: OVER clause with PARTITION BY, ORDER BY -// - Ranking: ROW_NUMBER(), RANK(), DENSE_RANK(), NTILE() -// - Analytic: LAG(), LEAD(), FIRST_VALUE(), LAST_VALUE() -// - Frame Clauses: ROWS/RANGE with PRECEDING/FOLLOWING/CURRENT ROW -// -// # v1.2.0 CTEs and Set Operations (Phase 2) -// -// - Common Table Expressions: WITH clause with recursive support -// - Set Operations: UNION, UNION ALL, EXCEPT, INTERSECT -// - Multiple CTEs: Comma-separated CTE definitions in single query -// - CTE Column Lists: Optional column specifications -// -// For comprehensive documentation, see doc.go in this package. package parser import ( @@ -261,6 +210,18 @@ func (p *Parser) Dialect() string { return p.dialect } +// Parser is a recursive-descent SQL parser that converts a token stream into an +// Abstract Syntax Tree (AST). +// +// Parser instances are not thread-safe. Each goroutine must use its own instance, +// obtained from the pool via GetParser and returned with PutParser: +// +// p := parser.GetParser() +// defer parser.PutParser(p) +// tree, err := p.ParseFromModelTokens(tokens) +// +// For dialect-aware parsing or strict mode, use NewParser with options, or call +// ApplyOptions on a pooled instance before parsing. type Parser struct { tokens []models.TokenWithSpan currentPos int diff --git a/pkg/sql/parser/preprocess.go b/pkg/sql/parser/preprocess.go index 2546ddee..de3366d5 100644 --- a/pkg/sql/parser/preprocess.go +++ b/pkg/sql/parser/preprocess.go @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package parser — token preprocessing (normalization) for the parser. -// -// preprocessTokens replaces the old token_conversion.go conversion layer. +// preprocessTokens implements token preprocessing (normalization) for the parser. +// It replaces the old token_conversion.go conversion layer. // Instead of converting models.TokenWithSpan → token.Token (which stripped span -// information), it now normalises a []models.TokenWithSpan in-place so the parser +// information), it normalises a []models.TokenWithSpan in-place so the parser // can consume it directly. // // The two responsibilities of the old layer are preserved here: diff --git a/pkg/sql/parser/recovery.go b/pkg/sql/parser/recovery.go index bbeb9642..a014d30f 100644 --- a/pkg/sql/parser/recovery.go +++ b/pkg/sql/parser/recovery.go @@ -34,6 +34,8 @@ type ParseError struct { Cause error // original error, accessible via Unwrap() } +// Error implements the error interface and returns a human-readable description +// of the parse error with position information when available. func (e *ParseError) Error() string { if e.Line > 0 { return fmt.Sprintf("parse error at line %d, column %d (token %d): %s", e.Line, e.Column, e.TokenIdx, e.Msg) diff --git a/pkg/sql/parser/validate.go b/pkg/sql/parser/validate.go index be0e6ad9..2c7d9d4e 100644 --- a/pkg/sql/parser/validate.go +++ b/pkg/sql/parser/validate.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package parser — Validate() fast path for SQL validation without full AST construction. +// Validate() implements a fast path for SQL validation without full AST construction. // See issue #274. package parser diff --git a/pkg/sql/security/scanner.go b/pkg/sql/security/scanner.go index 5fb420b7..7fde9245 100644 --- a/pkg/sql/security/scanner.go +++ b/pkg/sql/security/scanner.go @@ -12,9 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package security provides SQL injection pattern detection and security scanning -// capabilities for GoSQLX. It analyzes both parsed SQL ASTs and raw SQL strings -// to identify common SQL injection patterns and security vulnerabilities. +// Package security provides SQL injection detection and security scanning for GoSQLX. +// +// The primary entry points are NewScanner (creates a scanner that reports all severity +// levels), NewScannerWithSeverity (creates a scanner filtered to a minimum severity), +// Scanner.Scan (analyzes a parsed *ast.AST via deep tree traversal), and Scanner.ScanSQL +// (analyzes a raw SQL string using pre-compiled regex patterns). Both methods return a +// *ScanResult containing all Findings with severity, pattern type, risk description, and +// remediation suggestions, plus summary counts accessible via HasCritical(), +// HasHighOrAbove(), and IsClean(). // // # Overview // diff --git a/pkg/sql/token/doc.go b/pkg/sql/token/doc.go index 8ed41c7f..cf01b959 100644 --- a/pkg/sql/token/doc.go +++ b/pkg/sql/token/doc.go @@ -12,16 +12,24 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package token defines the Token struct and token pooling system for SQL lexical analysis. +// Package token defines the Token struct and object pool for SQL lexical analysis. // -// As of #215, the token system uses a unified integer-based type system (models.TokenType). -// The legacy string-based token.Type has been removed. +// A Token is the fundamental unit produced by the GoSQLX tokenizer: it pairs a +// models.TokenType integer constant (e.g., models.TokenTypeSelect, models.TokenTypeIdent) +// with the raw literal string from the source SQL. The integer-based TokenType taxonomy +// covers all SQL token categories — DML keywords (SELECT, INSERT, UPDATE, DELETE), +// DDL keywords (CREATE, ALTER, DROP), punctuation, operators, literals, and identifiers. +// The legacy string-based token.Type was removed in #215; all code should use models.TokenType. +// +// The package also provides an object pool (Get / Put) for zero-allocation token reuse in +// hot paths such as batch parsing or high-throughput server workloads. Every token obtained +// with Get must be returned with a deferred Put to avoid memory leaks. // // # Token Structure // // type Token struct { -// Type models.TokenType // Int-based type (primary, for performance) -// Literal string // The literal value of the token +// Type models.TokenType // Integer token type constant (primary, O(1) comparison) +// Literal string // Raw literal value from the SQL source // } // // # Basic Usage @@ -46,14 +54,15 @@ // The package provides an object pool for zero-allocation token reuse: // // tok := token.Get() -// defer token.Put(tok) // MANDATORY - return to pool when done +// defer token.Put(tok) // MANDATORY — return to pool when done // // tok.Type = models.TokenTypeSelect // tok.Literal = "SELECT" // // # See Also // -// - pkg/models: Core token type definitions (models.TokenType) -// - pkg/sql/tokenizer: SQL lexical analysis producing tokens -// - pkg/sql/parser: Parser consuming tokens +// - pkg/models: Core TokenType constants and the TokenTypeUnknown sentinel +// - pkg/sql/keywords: Keyword-to-TokenType mapping for all SQL dialects +// - pkg/sql/tokenizer: SQL lexical analysis that produces Token values +// - pkg/sql/parser: Recursive descent parser that consumes Token values package token diff --git a/pkg/sql/tokenizer/doc.go b/pkg/sql/tokenizer/doc.go index efd756c0..a75f5cf9 100644 --- a/pkg/sql/tokenizer/doc.go +++ b/pkg/sql/tokenizer/doc.go @@ -13,7 +13,13 @@ // limitations under the License. // Package tokenizer provides high-performance SQL tokenization with zero-copy operations -// and comprehensive Unicode support for GoSQLX v1.6.0. +// and comprehensive Unicode support. +// +// The primary entry points are Tokenize (convert raw SQL bytes to []models.TokenWithSpan), +// GetTokenizer and PutTokenizer (pool-based instance management for optimal memory +// efficiency), and TokenizeContext (tokenization with context cancellation support). +// The tokenizer operates directly on input byte slices without allocating intermediate +// strings, achieving 8M+ tokens/sec throughput with full UTF-8 support. // // # Overview // diff --git a/pkg/sql/tokenizer/tokenizer.go b/pkg/sql/tokenizer/tokenizer.go index 3c1b0c40..51f93c1d 100644 --- a/pkg/sql/tokenizer/tokenizer.go +++ b/pkg/sql/tokenizer/tokenizer.go @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -// Package tokenizer provides high-performance SQL tokenization with zero-copy operations. -// See doc.go for comprehensive package documentation. package tokenizer import ( diff --git a/pkg/transform/columns.go b/pkg/transform/columns.go index 5f2b8b66..a67dd85a 100644 --- a/pkg/transform/columns.go +++ b/pkg/transform/columns.go @@ -29,7 +29,14 @@ func getSelect(stmt ast.Statement, transform string) (*ast.SelectStatement, erro return sel, nil } -// AddColumn returns a Rule that adds a column expression to a SELECT statement. +// AddColumn returns a Rule that appends a column expression to the SELECT list of +// a SELECT statement. The expression may be any valid AST expression node such as +// *ast.Identifier, *ast.AliasedExpression, or *ast.FunctionCall. +// +// Parameters: +// - expr: The column expression to append to the SELECT list +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func AddColumn(expr ast.Expression) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "AddColumn") @@ -41,7 +48,12 @@ func AddColumn(expr ast.Expression) Rule { }) } -// RemoveColumn returns a Rule that removes a column by name or alias from a SELECT statement. +// RemoveColumn returns a Rule that removes the first column in the SELECT list that +// matches name. Matching is case-insensitive and checks both identifier names and +// expression aliases. +// +// Returns an error if no column matching name is found, or ErrUnsupportedStatement +// for non-SELECT statements. func RemoveColumn(name string) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "RemoveColumn") @@ -65,8 +77,15 @@ func RemoveColumn(name string) Rule { }) } -// ReplaceColumn returns a Rule that replaces a column identified by oldName -// with a new column identified by newName. +// ReplaceColumn returns a Rule that replaces every column in the SELECT list that +// matches oldName with a bare *ast.Identifier for newName. Matching is +// case-insensitive against both identifier names and aliases. +// +// Parameters: +// - oldName: Name or alias of the column to replace +// - newName: Replacement identifier name +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func ReplaceColumn(oldName, newName string) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "ReplaceColumn") @@ -82,7 +101,11 @@ func ReplaceColumn(oldName, newName string) Rule { }) } -// AddSelectStar returns a Rule that adds * to the SELECT columns. +// AddSelectStar returns a Rule that appends a wildcard * column to the SELECT list +// of a SELECT statement. This is a convenience wrapper around AddColumn with an +// *ast.Identifier{Name: "*"} argument. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func AddSelectStar() Rule { return AddColumn(&ast.Identifier{Name: "*"}) } diff --git a/pkg/transform/doc.go b/pkg/transform/doc.go index 0bc96102..3efa6c5d 100644 --- a/pkg/transform/doc.go +++ b/pkg/transform/doc.go @@ -14,19 +14,32 @@ // Package transform provides composable SQL query rewriting via AST manipulation. // -// This is GoSQLX's key differentiator — enabling safe, programmatic SQL modification. -// All transforms operate on AST nodes from pkg/sql/ast and preserve AST validity, -// meaning roundtrip (parse → transform → format/SQL) always produces valid SQL. +// This is GoSQLX's key differentiator — enabling safe, programmatic SQL modification +// without string concatenation. All transforms operate on AST nodes from pkg/sql/ast +// and preserve structural validity, meaning a roundtrip (parse -> transform -> format) +// always produces well-formed SQL. Transforms are defined by the Rule interface and +// applied individually or composed using Apply. // -// # Design +// # Available Transforms // -// Transforms are implemented as [Rule] values that can be applied individually -// or composed via [Apply]. Each rule modifies the AST in-place (since Go uses -// pointers) and returns an error if the transform cannot be applied. +// WHERE clause: AddWhere, AddWhereFromSQL, ReplaceWhere, RemoveWhere +// Columns: AddColumn, RemoveColumn +// JOINs: AddJoin, AddJoinFromSQL +// ORDER BY: AddOrderBy +// LIMIT/OFFSET: SetLimit, SetOffset (for pagination) +// Tables: RenameTable, AddTableAlias // // # WHERE Clause Transforms // -// // Add a filter condition +// // Add a filter condition using an AST node (safe for untrusted column values) +// rule := transform.AddWhere(&ast.BinaryExpression{ +// Left: &ast.Identifier{Name: "status"}, +// Operator: "=", +// Right: &ast.LiteralValue{Value: "active"}, +// }) +// transform.Apply(stmt, rule) +// +// // Add a filter from a trusted SQL string // rule := transform.AddWhereFromSQL("status = 'active'") // transform.Apply(stmt, rule) // @@ -42,6 +55,14 @@ // rule := transform.AddJoinFromSQL("LEFT JOIN orders ON orders.user_id = users.id") // transform.Apply(stmt, rule) // +// # Pagination +// +// // Set LIMIT and OFFSET for pagination +// transform.Apply(stmt, +// transform.SetLimit(20), +// transform.SetOffset(40), +// ) +// // # Security // // WARNING: Functions that accept raw SQL strings (AddWhereFromSQL, AddJoinFromSQL) @@ -51,7 +72,7 @@ // // # Composability // -// Multiple transforms can be chained: +// Multiple transforms can be chained in a single Apply call: // // transform.Apply(stmt, // transform.AddWhereFromSQL("active = true"), diff --git a/pkg/transform/joins.go b/pkg/transform/joins.go index b5f984b8..56ae7fdd 100644 --- a/pkg/transform/joins.go +++ b/pkg/transform/joins.go @@ -31,8 +31,17 @@ var validJoinTypes = map[string]bool{ "NATURAL": true, } -// AddJoin returns a Rule that adds a JOIN clause to a SELECT statement. -// joinType must be one of: INNER, LEFT, RIGHT, FULL, CROSS, NATURAL. +// AddJoin returns a Rule that appends a JOIN clause to the SELECT statement. The +// join type is validated against the set of supported types; an error is returned +// for any unrecognised value. +// +// Parameters: +// - joinType: One of INNER, LEFT, RIGHT, FULL, CROSS, or NATURAL (case-insensitive) +// - table: Name of the table to join +// - condition: AST expression for the ON condition (may be nil for CROSS/NATURAL joins) +// +// Returns an error for unrecognised join types or ErrUnsupportedStatement for +// non-SELECT statements. func AddJoin(joinType string, table string, condition ast.Expression) Rule { return RuleFunc(func(stmt ast.Statement) error { upper := strings.ToUpper(joinType) @@ -52,7 +61,14 @@ func AddJoin(joinType string, table string, condition ast.Expression) Rule { }) } -// RemoveJoin returns a Rule that removes a JOIN by table name from a SELECT statement. +// RemoveJoin returns a Rule that removes all JOIN clauses whose right-hand table name +// or alias matches tableName (case-insensitive). If no matching JOIN exists the +// statement is returned unmodified without an error. +// +// Parameters: +// - tableName: Name or alias of the table to remove from the JOIN list +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func RemoveJoin(tableName string) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "RemoveJoin") diff --git a/pkg/transform/limit.go b/pkg/transform/limit.go index ad120198..0a287258 100644 --- a/pkg/transform/limit.go +++ b/pkg/transform/limit.go @@ -20,8 +20,13 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" ) -// SetLimit returns a Rule that sets or replaces the LIMIT clause on a SELECT statement. -// n must be non-negative. +// SetLimit returns a Rule that sets (or replaces) the LIMIT clause of a SELECT +// statement. Any existing LIMIT value is overwritten. +// +// Parameters: +// - n: Number of rows to return; must be >= 0. Returns an error for negative values. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func SetLimit(n int) Rule { return RuleFunc(func(stmt ast.Statement) error { if n < 0 { @@ -36,8 +41,13 @@ func SetLimit(n int) Rule { }) } -// SetOffset returns a Rule that sets or replaces the OFFSET clause on a SELECT statement. -// n must be non-negative. +// SetOffset returns a Rule that sets (or replaces) the OFFSET clause of a SELECT +// statement. Use together with SetLimit to implement pagination. +// +// Parameters: +// - n: Number of rows to skip; must be >= 0. Returns an error for negative values. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func SetOffset(n int) Rule { return RuleFunc(func(stmt ast.Statement) error { if n < 0 { @@ -52,7 +62,10 @@ func SetOffset(n int) Rule { }) } -// RemoveLimit returns a Rule that removes the LIMIT clause from a SELECT statement. +// RemoveLimit returns a Rule that removes the LIMIT clause from a SELECT statement, +// allowing the query to return an unbounded number of rows. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func RemoveLimit() Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "RemoveLimit") @@ -64,7 +77,10 @@ func RemoveLimit() Rule { }) } -// RemoveOffset returns a Rule that removes the OFFSET clause from a SELECT statement. +// RemoveOffset returns a Rule that removes the OFFSET clause from a SELECT statement, +// resetting pagination to start from the first row. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func RemoveOffset() Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "RemoveOffset") diff --git a/pkg/transform/orderby.go b/pkg/transform/orderby.go index e5c55cc6..e06b829d 100644 --- a/pkg/transform/orderby.go +++ b/pkg/transform/orderby.go @@ -18,8 +18,14 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" ) -// AddOrderBy returns a Rule that adds an ORDER BY expression to a SELECT statement. -// If desc is true, the order is descending; otherwise ascending. +// AddOrderBy returns a Rule that appends an ORDER BY expression to a SELECT +// statement. Multiple calls append additional sort keys in the order they are applied. +// +// Parameters: +// - column: Name of the column (or expression alias) to sort by +// - desc: When true the sort direction is DESC; when false it is ASC +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func AddOrderBy(column string, desc bool) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "AddOrderBy") @@ -34,7 +40,13 @@ func AddOrderBy(column string, desc bool) Rule { }) } -// RemoveOrderBy returns a Rule that removes the ORDER BY clause entirely from a SELECT statement. +// RemoveOrderBy returns a Rule that removes the ORDER BY clause entirely from a +// SELECT statement, leaving the result set in an unspecified (engine-dependent) +// order. This is useful when rewriting queries for intermediate stages in a +// pipeline where ordering should be deferred to the final step, or when +// performance is preferred over a stable row order. +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func RemoveOrderBy() Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "RemoveOrderBy") diff --git a/pkg/transform/tables.go b/pkg/transform/tables.go index 058f651a..b95e8ae2 100644 --- a/pkg/transform/tables.go +++ b/pkg/transform/tables.go @@ -20,8 +20,22 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/sql/ast" ) -// ReplaceTable returns a Rule that replaces a table name everywhere it appears -// (FROM, JOIN, WHERE column qualifiers) in a SELECT, UPDATE, or DELETE statement. +// ReplaceTable returns a Rule that replaces all occurrences of a table name +// throughout a SELECT, UPDATE, or DELETE statement. The replacement covers: +// - FROM clause table references +// - JOIN clause left and right table references +// - Column qualifiers (table.column identifiers in SELECT list, WHERE, ORDER BY) +// - The table name field of UPDATE and DELETE statements +// - Subquery FROM/JOIN references (recursively) +// +// Matching is case-insensitive. This is useful for renaming tables, routing +// queries to shards, or switching between environments. +// +// Parameters: +// - oldName: The table name to replace (case-insensitive) +// - newName: The replacement table name +// +// Returns ErrUnsupportedStatement for INSERT or DDL statements. func ReplaceTable(oldName, newName string) Rule { return RuleFunc(func(stmt ast.Statement) error { switch s := stmt.(type) { @@ -54,7 +68,16 @@ func ReplaceTable(oldName, newName string) Rule { }) } -// AddTableAlias returns a Rule that adds an alias to a table in the FROM clause. +// AddTableAlias returns a Rule that assigns an alias to a specific table in a +// SELECT, UPDATE, or DELETE statement. For SELECT statements the alias is applied +// to the matching entry in the FROM clause; for UPDATE and DELETE it sets the +// statement-level alias field. +// +// Parameters: +// - tableName: The table to alias (case-insensitive match) +// - alias: The alias to assign +// +// Returns ErrUnsupportedStatement for INSERT or DDL statements. func AddTableAlias(tableName, alias string) Rule { return RuleFunc(func(stmt ast.Statement) error { switch s := stmt.(type) { @@ -81,8 +104,18 @@ func AddTableAlias(tableName, alias string) Rule { }) } -// QualifyColumns returns a Rule that prefixes unqualified column references -// with the given table name in a SELECT statement. +// QualifyColumns returns a Rule that prefixes every unqualified column reference +// in a SELECT statement's column list and WHERE clause with tableName. Only +// identifiers that have no existing table qualifier and whose name is not the +// wildcard "*" are modified. +// +// This is useful when merging standalone column lists into multi-table queries to +// avoid ambiguous column reference errors. +// +// Parameters: +// - tableName: The qualifier to prepend to unqualified identifiers +// +// Returns ErrUnsupportedStatement for non-SELECT statements. func QualifyColumns(tableName string) Rule { return RuleFunc(func(stmt ast.Statement) error { sel, err := getSelect(stmt, "QualifyColumns") diff --git a/pkg/transform/transform.go b/pkg/transform/transform.go index 0941341e..945856a5 100644 --- a/pkg/transform/transform.go +++ b/pkg/transform/transform.go @@ -23,21 +23,63 @@ import ( "github.com/ajitpratap0/GoSQLX/pkg/sql/tokenizer" ) -// Rule represents a rewrite rule that can be applied to a statement. +// Rule represents an AST rewrite rule that can be applied to a single SQL statement. +// Rules modify the AST in-place and return an error if the transform cannot be +// applied (e.g., applying a SELECT-only rule to an INSERT statement). +// +// Implement this interface to create custom transform rules: +// +// type MyRule struct{} +// +// func (r MyRule) Apply(stmt ast.Statement) error { +// sel, ok := stmt.(*ast.SelectStatement) +// if !ok { +// return nil // skip non-SELECT statements +// } +// // modify sel in-place +// return nil +// } +// +// Built-in rules are created by the constructor functions in this package (AddWhere, +// AddColumn, AddJoin, SetLimit, etc.). Use Apply (the package-level function) to +// chain multiple rules together. type Rule interface { Apply(stmt ast.Statement) error } -// RuleFunc adapts a function to the Rule interface. +// RuleFunc is a function type that implements the Rule interface. It allows +// anonymous functions and closures to be used directly as transform rules without +// defining a named type. All built-in rule constructors (AddWhere, AddColumn, etc.) +// return a RuleFunc internally. +// +// Example: +// +// rule := transform.RuleFunc(func(stmt ast.Statement) error { +// sel, ok := stmt.(*ast.SelectStatement) +// if !ok { +// return nil +// } +// sel.Distinct = true +// return nil +// }) type RuleFunc func(stmt ast.Statement) error -// Apply implements Rule. +// Apply implements the Rule interface by invoking the underlying function. func (f RuleFunc) Apply(stmt ast.Statement) error { return f(stmt) } -// Apply applies multiple rules to a statement in order. -// If any rule returns an error, Apply stops and returns that error. +// Apply executes one or more rules against an AST statement in the order they are +// provided. If any rule returns a non-nil error the function stops immediately and +// returns that error without applying subsequent rules. +// +// This is the primary entry point for composing transforms: +// +// err := transform.Apply(stmt, +// transform.AddWhereFromSQL("active = true"), +// transform.SetLimit(100), +// transform.AddOrderBy("created_at", true), +// ) func Apply(stmt ast.Statement, rules ...Rule) error { for _, rule := range rules { if err := rule.Apply(stmt); err != nil { @@ -47,7 +89,13 @@ func Apply(stmt ast.Statement, rules ...Rule) error { return nil } -// ErrUnsupportedStatement is returned when a transform is applied to an unsupported statement type. +// ErrUnsupportedStatement is returned when a transform rule is applied to a statement +// type it does not support. For example, AddColumn only supports SelectStatement; applying +// it to an InsertStatement will produce this error. +// +// Fields: +// - Transform: Name of the transform function that produced the error (e.g., "AddColumn") +// - Got: Human-readable name of the statement type that was rejected (e.g., "INSERT") type ErrUnsupportedStatement struct { Transform string Got string @@ -128,8 +176,22 @@ func stmtTypeName(stmt ast.Statement) string { } } -// ParseSQL parses a SQL string into an AST. This is a convenience function -// for use with transform functions. +// ParseSQL parses a SQL string into a full AST containing all statements. This is +// a convenience wrapper around the tokenizer and parser pipeline that handles +// resource pooling automatically. +// +// Use this function when you need an AST for subsequent Apply calls: +// +// tree, err := transform.ParseSQL("SELECT id, name FROM users WHERE active = true") +// if err != nil { +// log.Fatal(err) +// } +// stmt := tree.Statements[0] +// transform.Apply(stmt, transform.SetLimit(10)) +// fmt.Println(transform.FormatSQL(stmt)) +// +// Returns a *ast.AST containing all parsed statements, or an error if tokenization +// or parsing fails. func ParseSQL(sql string) (*ast.AST, error) { tkz := tokenizer.GetTokenizer() defer tokenizer.PutTokenizer(tkz) @@ -150,7 +212,17 @@ func ParseSQL(sql string) (*ast.AST, error) { return tree, nil } -// FormatSQL formats an AST statement back to SQL using compact style. +// FormatSQL converts an AST statement back into a compact SQL string using the +// GoSQLX formatter. It is the inverse of ParseSQL and completes the +// parse-transform-format round-trip. +// +// The output uses compact style with minimal whitespace. Use this after applying +// transforms to obtain the final SQL to execute or log. +// +// Example: +// +// sql := transform.FormatSQL(stmt) +// // "SELECT id, name FROM users WHERE active = true LIMIT 10" func FormatSQL(stmt ast.Statement) string { return formatter.FormatStatement(stmt, ast.CompactStyle()) } diff --git a/pkg/transform/where.go b/pkg/transform/where.go index 1ec1ab01..3b5e4ffa 100644 --- a/pkg/transform/where.go +++ b/pkg/transform/where.go @@ -32,8 +32,18 @@ func getWhere(stmt ast.Statement) (*ast.Expression, error) { } } -// AddWhere returns a Rule that adds an AND condition to the existing WHERE clause. -// If no WHERE clause exists, the condition becomes the WHERE clause. +// AddWhere returns a Rule that appends a condition to the WHERE clause of a +// SELECT, UPDATE, or DELETE statement. If the statement already has a WHERE clause, +// the new condition is combined with AND. If there is no WHERE clause, the condition +// becomes the sole WHERE predicate. +// +// Use this when you have a pre-built AST expression. For raw SQL strings use +// AddWhereFromSQL instead. +// +// Parameters: +// - condition: An AST expression node representing the filter predicate +// +// Returns ErrUnsupportedStatement for INSERT or DDL statements. func AddWhere(condition ast.Expression) Rule { return RuleFunc(func(stmt ast.Statement) error { where, err := getWhere(stmt) @@ -53,7 +63,12 @@ func AddWhere(condition ast.Expression) Rule { }) } -// RemoveWhere returns a Rule that removes the WHERE clause entirely. +// RemoveWhere returns a Rule that removes the WHERE clause from a SELECT, UPDATE, +// or DELETE statement. After the rule is applied, the statement will match all rows +// in the target table(s). Use with care in production to avoid unintentional full +// table scans or mass updates. +// +// Returns ErrUnsupportedStatement for INSERT or DDL statements. func RemoveWhere() Rule { return RuleFunc(func(stmt ast.Statement) error { where, err := getWhere(stmt) @@ -65,7 +80,14 @@ func RemoveWhere() Rule { }) } -// ReplaceWhere returns a Rule that replaces the WHERE clause with the given condition. +// ReplaceWhere returns a Rule that unconditionally replaces the WHERE clause of a +// SELECT, UPDATE, or DELETE statement with the given condition. Unlike AddWhere, +// this discards any existing WHERE predicate instead of combining with AND. +// +// Parameters: +// - condition: The new AST expression to use as the WHERE predicate +// +// Returns ErrUnsupportedStatement for INSERT or DDL statements. func ReplaceWhere(condition ast.Expression) Rule { return RuleFunc(func(stmt ast.Statement) error { where, err := getWhere(stmt)