1+ import logging
2+ import re
3+ from typing import Callable
4+
5+ import fenic as fc
6+
7+ logger = logging .getLogger (__name__ )
8+ logger .setLevel (logging .DEBUG )
9+
10+ class FenicAPIDocQuerySearch :
11+ """Search for queries to the Fenic API.
12+ Supports both keyword and regex search.
13+ """
14+
15+ @classmethod
16+ def _is_valid_regex (cls , query : str ) -> bool :
17+ """Heuristic check to see if the query is a regex."""
18+ try :
19+ re .compile (query )
20+ return True
21+ except re .error :
22+ return False
23+
24+ @classmethod
25+ def _search_api_docs_regex (cls , df : fc .DataFrame , query : str ) -> fc .DataFrame :
26+ """Search API documentation using regex."""
27+ return df .filter (
28+ fc .col ("name" ).rlike (f"(?i){ query } " )
29+ | fc .col ("qualified_name" ).rlike (f"(?i){ query } " )
30+ | (
31+ fc .col ("docstring" ).is_not_null ()
32+ & fc .col ("docstring" ).rlike (f"(?i){ query } " )
33+ )
34+ | (
35+ fc .col ("annotation" ).is_not_null ()
36+ & fc .col ("annotation" ).rlike (f"(?i){ query } " )
37+ )
38+ | (
39+ fc .col ("returns" ).is_not_null ()
40+ & fc .col ("returns" ).rlike (f"(?i){ query } " )
41+ )
42+ )
43+
44+ @classmethod
45+ def _search_learnings_regex (cls , df : fc .DataFrame , query : str ) -> fc .DataFrame :
46+ """Search learnings using regex."""
47+ return df .filter (
48+ fc .col ("question" ).rlike (f"(?i){ query } " )
49+ | fc .col ("answer" ).rlike (f"(?i){ query } " )
50+ | fc .array_contains (fc .col ("keywords" ), query )
51+ )
52+
53+ @classmethod
54+ def _search_learnings_keyword (cls , df : fc .DataFrame , term : str ) -> fc .DataFrame :
55+ """Search learnings using keyword."""
56+ return df .filter (
57+ fc .col ("question" ).contains (term )
58+ | fc .col ("answer" ).contains (term )
59+ | fc .array_contains (fc .col ("keywords" ), term )
60+ )
61+
62+ @classmethod
63+ def _search_terms (
64+ cls ,
65+ df : fc .DataFrame ,
66+ query : str ,
67+ search_func : Callable [[fc .DataFrame , str ], fc .DataFrame ],
68+ ) -> fc .DataFrame :
69+ """Search using multiple terms."""
70+ # First search the query as a whole.
71+ result_df = search_func (df , query )
72+ logger .debug (f"result_df - { query } : { result_df .count ()} " )
73+
74+ # look for each individual term as well.
75+ terms = query .lower ().split ()
76+ terms_data_frames = []
77+ for term in terms :
78+ terms_data_frames .append (search_func (df , term ))
79+ result_df = result_df .union (terms_data_frames [0 ])
80+ for df in terms_data_frames [1 :]:
81+ result_df = result_df .union (df )
82+
83+ logger .debug (f"learnings results: { result_df .to_pydict ()} " )
84+
85+ return result_df
86+
87+ @classmethod
88+ def search_learnings (cls , session : fc .Session , query : str ) -> fc .DataFrame :
89+ """Search learnings using keyword."""
90+ if session .catalog .does_table_exist ("learnings" ):
91+ try :
92+ learnings_df = session .table ("learnings" )
93+
94+ logger .debug (f"Searching learnings with regex: { query } " )
95+ learnings_search = cls ._search_terms (
96+ learnings_df , query , cls ._search_learnings_regex
97+ )
98+
99+ # Add relevance scoring for learnings
100+ learnings_scored = learnings_search .select (
101+ "question" ,
102+ "answer" ,
103+ "learning_type" ,
104+ "keywords" ,
105+ "related_functions" ,
106+ fc .when (fc .col ("question" ).rlike (f"(?i){ query } " ), fc .lit (10 ))
107+ .otherwise (fc .lit (0 ))
108+ .alias ("question_score" ),
109+ fc .when (fc .col ("answer" ).rlike (f"(?i){ query } " ), fc .lit (5 ))
110+ .otherwise (fc .lit (0 ))
111+ .alias ("answer_score" ),
112+ fc .when (fc .array_contains (fc .col ("keywords" ), query ), fc .lit (3 ))
113+ .otherwise (fc .lit (0 ))
114+ .alias ("keywords_score" ),
115+ )
116+
117+ # Calculate total score with correction boost
118+ learnings_scored = learnings_scored .select (
119+ "*" ,
120+ (
121+ fc .col ("question_score" )
122+ + fc .col ("answer_score" )
123+ + fc .col ("keywords_score" )
124+ ).alias ("base_score" ),
125+ ).select (
126+ "*" ,
127+ fc .when (
128+ fc .col ("learning_type" ) == "correction" ,
129+ fc .col ("base_score" ) * 1.5 ,
130+ )
131+ .otherwise (fc .col ("base_score" ))
132+ .alias ("score" ),
133+ )
134+
135+ # Sort and limit learnings (max 7 results)
136+ return learnings_scored .order_by (fc .col ("score" ).desc ()).limit (7 )
137+ except Exception as e :
138+ logger .error (f"Warning: Learnings search failed: { e } " )
139+ return None
140+
141+ @classmethod
142+ def search_api_docs (cls , session : fc .Session , query : str ) -> fc .DataFrame :
143+ # Search API documentation
144+ df = session .table ("api_df" )
145+
146+ # Filter only public API elements
147+ df = df .filter (
148+ (fc .col ("is_public" )) & (~ fc .col ("qualified_name" ).contains ("._" ))
149+ )
150+
151+ if not cls ._is_valid_regex (query ):
152+ raise ValueError ("Invalid regex query" )
153+ logger .debug (f"Searching API docs with regex: { query } " )
154+ search_df = cls ._search_api_docs_regex (df , query )
155+
156+ # Add relevance scoring
157+ search_df = search_df .select (
158+ "type" ,
159+ "name" ,
160+ "qualified_name" ,
161+ "docstring" ,
162+ fc .when (fc .col ("name" ).rlike (f"(?i){ query } " ), fc .lit (10 ))
163+ .otherwise (fc .lit (0 ))
164+ .alias ("name_score" ),
165+ fc .when (fc .col ("qualified_name" ).rlike (f"(?i){ query } " ), fc .lit (5 ))
166+ .otherwise (fc .lit (0 ))
167+ .alias ("path_score" ),
168+ )
169+
170+ # Calculate total score and sort
171+ search_df = search_df .select (
172+ "*" , (fc .col ("name_score" ) + fc .col ("path_score" )).alias ("score" )
173+ )
174+
175+ return search_df
0 commit comments