99GRAFANA_URL = (
1010 "https://influx-prod-13-prod-us-east-0.grafana.net/api/v1/push/influx/write"
1111)
12-
13- # Path to checked out llvm/llvm-project repository
14- REPOSITORY_PATH = "/data/llvm-project"
15-
16- # Path to record of most recently processed commits
17- DATA_PATH = "/data/recent_commits.csv"
12+ REPOSITORY_URL = "https://github.com/llvm/llvm-project.git"
1813
1914# Number of days to look back for new commits
2015# We allow some buffer time between when a commit is made and when it is queried
@@ -61,99 +56,33 @@ class LLVMCommitInfo:
6156 is_approved : bool = False
6257
6358
64- def read_past_commits () -> list [list [str ]]:
65- """Read recently scraped commits from the data path.
66-
67- Returns:
68- List of commits that have been scraped.
69- """
70- # If the data path doesn't exist, we haven't scraped any commits yet.
71- if not os .path .exists (DATA_PATH ):
72- logging .warning (
73- " Data path %s does not exist. No past commits found." , DATA_PATH
74- )
75- return []
76-
77- # Read the past commits from the data path
78- with open (DATA_PATH , "r" ) as f :
79- f .readline () # Skip header
80- rows = f .readlines ()
81- commit_history = [row .strip ().split ("," ) for row in rows if row .strip ()]
82- return commit_history
83-
84-
85- def record_new_commits (new_commits : list [LLVMCommitInfo ]) -> None :
86- """Record newly scraped commits to the data path.
87-
88- Args:
89- new_commits: List of commits to record.
90-
91- Returns:
92- None
93- """
94- with open (DATA_PATH , "w" ) as f :
95-
96- # Write CSV header
97- f .write (
98- "," .join ([
99- "commit_sha" ,
100- "commit_datetime" ,
101- "has_pull_request" ,
102- "pull_request_number" ,
103- "is_reviewed" ,
104- "is_approved" ,
105- ])
106- + "\n "
107- )
108-
109- # We want the newest commit as the last entry, so iterate backwards
110- for i in range (len (new_commits ) - 1 , - 1 , - 1 ):
111- commit_info = new_commits [i ]
112- record = "," .join ([
113- commit_info .commit_sha ,
114- commit_info .commit_datetime .astimezone (
115- datetime .timezone .utc
116- ).isoformat (),
117- str (commit_info .has_pull_request ),
118- str (commit_info .pr_number ),
119- str (commit_info .is_reviewed ),
120- str (commit_info .is_approved ),
121- ])
122- f .write (f"{ record } \n " )
123-
124-
12559def scrape_new_commits_by_date (
126- last_known_commit : str , target_datetime : datetime .datetime
60+ target_datetime : datetime .datetime ,
12761) -> list [git .Commit ]:
12862 """Scrape new commits from a given dates.
12963
13064 Args:
131- last_known_commit: The last known scraped commit.
13265 target_datetime: The date to scrape for new commits.
13366
13467 Returns:
13568 List of new commits made on the given date.
13669 """
137- # Pull any new commits into local repository
138- repo = git .Repo (REPOSITORY_PATH )
139- repo .remotes .origin .pull ()
70+ # Clone repository to current working directory
71+ repo = git .Repo .clone_from (
72+ url = REPOSITORY_URL ,
73+ to_path = "./llvm-project" ,
74+ )
14075
14176 # Scrape for new commits
14277 # iter_commits() yields commits in reverse chronological order
14378 new_commits = []
14479 for commit in repo .iter_commits ():
145- # Skip commits that are too new
80+ # Skip commits that don't match the target date
14681 committed_datetime = commit .committed_datetime .astimezone (
14782 datetime .timezone .utc
14883 )
149- if committed_datetime .date () > target_datetime .date ():
84+ if committed_datetime .date () != target_datetime .date ():
15085 continue
151- # Stop scraping if the commit is older than the target date
152- if committed_datetime .date () < target_datetime .date ():
153- break
154- # Stop scraping if we've already recorded this commit
155- if commit .hexsha == last_known_commit :
156- break
15786
15887 new_commits .append (commit )
15988
@@ -274,20 +203,15 @@ def main() -> None:
274203 grafana_api_key = os .environ ["GRAFANA_API_KEY" ]
275204 grafana_metrics_userid = os .environ ["GRAFANA_METRICS_USERID" ]
276205
277- logging .info ("Reading recently processed commits." )
278- recorded_commits = read_past_commits ()
279-
280- last_known_commit = recorded_commits [- 1 ][0 ] if recorded_commits else ""
281-
282- # Scrape new commits, if any
206+ # Scrape new commits
283207 date_to_scrape = datetime .datetime .now (
284208 datetime .timezone .utc
285209 ) - datetime .timedelta (days = LOOKBACK_DAYS )
286210 logging .info (
287- "Scraping checked out llvm/llvm-project for new commits on %s" ,
211+ "Cloning and scraping llvm/llvm-project for new commits on %s" ,
288212 date_to_scrape .strftime ("%Y-%m-%d" ),
289213 )
290- new_commits = scrape_new_commits_by_date (last_known_commit , date_to_scrape )
214+ new_commits = scrape_new_commits_by_date (date_to_scrape )
291215 if not new_commits :
292216 logging .info ("No new commits found. Exiting." )
293217 return
@@ -298,11 +222,7 @@ def main() -> None:
298222 logging .info ("Uploading metrics to Grafana." )
299223 upload_daily_metrics (grafana_api_key , grafana_metrics_userid , new_commit_info )
300224
301- logging .info ("Recording new commits." )
302- record_new_commits (new_commit_info )
303-
304225
305226if __name__ == "__main__" :
306227 logging .basicConfig (level = logging .INFO )
307228 main ()
308-
0 commit comments