99GRAFANA_URL = (
1010 "https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
1111)
12+ GITHUB_GRAPHQL_API_URL = "https://api.github.com/graphql"
1213REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
1314
15+ # How many commits to query the GitHub GraphQL API for at a time.
16+ # Querying too many commits at once often leads to the call failing.
17+ GITHUB_API_BATCH_SIZE = 75
18+
1419# Number of days to look back for new commits
1520# We allow some buffer time between when a commit is made and when it is queried
1621# for reviews. This is allow time for any events to propogate in the GitHub
4449 AND JSON_VALUE(pr_event.payload, '$.pull_request.merge_commit_sha') IS NOT NULL
4550"""
4651
52+ # Template GraphQL subquery to check if a commit has an associated pull request
53+ # and whether that pull request has been reviewed and approved.
54+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE = """
55+ commit_{commit_sha}:
56+ object(oid:"{commit_sha}") {{
57+ ... on Commit {{
58+ associatedPullRequests(first: 1) {{
59+ totalCount
60+ pullRequest: nodes {{
61+ number
62+ reviewDecision
63+ }}
64+ }}
65+ }}
66+ }}
67+ """
68+
4769
4870@dataclasses .dataclass
4971class LLVMCommitInfo :
@@ -153,6 +175,85 @@ def query_for_reviews(
153175 return list (new_commits .values ())
154176
155177
178+ def validate_push_commits (
179+ new_commits : list [LLVMCommitInfo ], github_token : str
180+ ) -> None :
181+ """Validate that push commits don't have a pull request.
182+
183+ To address lossiness of data from GitHub Archive BigQuery, we check each
184+ commit to see if it actually has an associated pull request.
185+
186+ Args:
187+ new_commits: List of commits to validate.
188+ github_token: The access token to use with the GitHub GraphQL API.
189+ """
190+
191+ # Get all push commits from new commits and form their subqueries
192+ commit_subqueries = []
193+ potential_push_commits = {}
194+ for commit in new_commits :
195+ if commit .has_pull_request :
196+ continue
197+ potential_push_commits [commit .commit_sha ] = commit
198+ commit_subqueries .append (
199+ COMMIT_GRAPHQL_SUBQUERY_TEMPLATE .format (commit_sha = commit .commit_sha )
200+ )
201+ logging .info ("Found %d potential push commits" , len (potential_push_commits ))
202+
203+ # Query GitHub GraphQL API for pull requests associated with push commits
204+ # We query in batches as large queries often fail
205+ api_commit_data = {}
206+ query_template = """
207+ query {
208+ repository(owner:"llvm", name:"llvm-project"){
209+ %s
210+ }
211+ }
212+ """
213+ num_batches = len (commit_subqueries ) // GITHUB_API_BATCH_SIZE + 1
214+ logging .info ("Querying GitHub GraphQL API in %d batches" , num_batches )
215+ for i in range (num_batches ):
216+ subquery_batch = commit_subqueries [
217+ i * GITHUB_API_BATCH_SIZE : (i + 1 ) * GITHUB_API_BATCH_SIZE
218+ ]
219+ query = query_template % "" .join (subquery_batch )
220+
221+ logging .info (
222+ "Querying batch %d of %d (%d commits)" ,
223+ i + 1 ,
224+ num_batches ,
225+ len (subquery_batch ),
226+ )
227+ response = requests .post (
228+ url = GITHUB_GRAPHQL_API_URL ,
229+ headers = {
230+ "Authorization" : f"bearer { github_token } " ,
231+ },
232+ json = {"query" : query },
233+ )
234+ if response .status_code < 200 or response .status_code >= 300 :
235+ logging .error ("Failed to query GitHub GraphQL API: %s" , response .text )
236+ api_commit_data .update (response .json ()["data" ]["repository" ])
237+
238+ amend_count = 0
239+ for commit_sha , data in api_commit_data .items ():
240+ # Verify that push commit has no pull requests
241+ commit_sha = commit_sha .removeprefix ("commit_" )
242+ if data ["associatedPullRequests" ]["totalCount" ] == 0 :
243+ continue
244+
245+ # Amend fields with new data from API
246+ pull_request = data ["associatedPullRequests" ]["pullRequest" ][0 ]
247+ commit_info = potential_push_commits [commit_sha ]
248+ commit_info .has_pull_request = True
249+ commit_info .pr_number = pull_request ["number" ]
250+ commit_info .is_reviewed = pull_request ["reviewDecision" ] is not None
251+ commit_info .is_approved = pull_request ["reviewDecision" ] == "APPROVED"
252+ amend_count += 1
253+
254+ logging .info ("Amended %d commits" , amend_count )
255+
256+
156257def upload_daily_metrics (
157258 grafana_api_key : str ,
158259 grafana_metrics_userid : str ,
@@ -164,9 +265,6 @@ def upload_daily_metrics(
164265 grafana_api_key: The key to make API requests with.
165266 grafana_metrics_userid: The user to make API requests with.
166267 new_commits: List of commits to process & upload to Grafana.
167-
168- Returns:
169- None
170268 """
171269 # Count each type of commit made
172270 approval_count = 0
@@ -200,6 +298,7 @@ def upload_daily_metrics(
200298
201299
202300def main () -> None :
301+ github_token = os .environ ["GITHUB_TOKEN" ]
203302 grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
204303 grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
205304
@@ -219,6 +318,9 @@ def main() -> None:
219318 logging .info ("Querying for reviews of new commits." )
220319 new_commit_info = query_for_reviews (new_commits , date_to_scrape )
221320
321+ logging .info ("Validating push commits." )
322+ validate_push_commits (new_commit_info , github_token )
323+
222324 logging .info ("Uploading metrics to Grafana." )
223325 upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
224326
0 commit comments