3535PID_REGEX = re .compile (r"([^:]*\d{6,}.*)" )
3636
3737
38-
3938def get_option_parser () -> COP :
4039 parser = COP (
4140 __doc__ ,
@@ -46,12 +45,9 @@ def get_option_parser() -> COP:
4645 "-i" , type = int , help = "interval between query cycles in seconds" ,
4746 default = 10 , dest = "delay" )
4847 parser .add_option (
49- "-o" , type = str , help = "output directory for json file" ,
50- default = os .environ ['DATADIR' ], dest = "output_dir" )
51- parser .add_option (
52- "-m" , type = str , help = "Location of memory process files" ,
48+ "-m" , type = str , help = "Location of cgroups directory" ,
5349 default = "/sys/fs/cgroup" ,
54- dest = "memory " )
50+ dest = "cgroup_location " )
5551
5652 return parser
5753
@@ -70,7 +66,6 @@ class Process:
7066 """Class for representing CPU and Memory usage of a process"""
7167 cgroup_memory_path : str
7268 cgroup_cpu_path : str
73- job_id : str
7469
7570
7671def stop_profiler (* args ):
@@ -88,24 +83,28 @@ def parse_memory_file(process):
8883 return int (line )
8984
9085
91- def parse_cpu_file (process ):
92- """Open the memory stat file and copy the appropriate data"""
86+ def parse_cpu_file (process , cgroup_version ):
87+ """Open the memory stat file and return the appropriate data"""
9388 memory_stats = {}
9489
95- for line in open (process .cgroup_cpu_path ):
96- if "usage_usec" in line :
97- return int (re .findall (r'\d+' , line )[0 ])
90+ if cgroup_version == 1 :
91+ for line in open (process .cgroup_cpu_path ):
92+ if "usage_usec" in line :
93+ return int (re .findall (r'\d+' , line )[0 ]) / 1000
94+ elif cgroup_version == 2 :
95+ for line in open (process .cgroup_cpu_path ):
96+ # Cgroups v2 uses nanoseconds
97+ return int (line ) / 1000000
98+ else :
99+ raise FileNotFoundError ("cpu usage files not found" )
98100
99101
100- def write_data (process , data , output_dir , data_type , filename ):
101-
102- # Build the output file path
103- path = os .path .join (output_dir , process .job_id + data_type )
102+ def write_data (data , filename ):
104103 try :
105104 with open (filename , 'w' ) as f :
106105 f .write (data + "\n " )
107106 except IOError :
108- raise IOError ("Unable to write memory data to file" )
107+ raise IOError ("Unable to write data to file:" + filename )
109108
110109
111110def get_cgroup_dir ():
@@ -119,31 +118,41 @@ def get_cgroup_dir():
119118
120119
121120def profile (args ):
122-
121+ # Find the cgroup that this process is running in.
122+ # Cylc will put this profiler in the same cgroup as the job it is profiling
123123 cgroup_name = get_cgroup_dir ()
124124
125- # AZURE SPICE CGROUP LOCATION
126- cgroup_location = "/sys/fs/cgroup/" + cgroup_name
125+ # HPC uses cgroups v2 and SPICE uses cgroups v1
126+ cgroup_version = None
127+
128+ if Path .exists (Path (args .cgroup_location + cgroup_name )):
129+ cgroup_version = 1
130+ elif Path .exists (Path (args .cgroup_location + "/memory" + cgroup_name )):
131+ cgroup_version = 2
132+ else :
133+ raise FileNotFoundError ("cgroups not found:" + cgroup_name )
134+
127135 peak_memory = 0
128136 processes = []
129- # last_system_usage = None
130- # last_cpu_usage = None
131- # Find the correct memory_stat file for the process
132- if not Path .exists (Path (cgroup_location )):
133- raise FileNotFoundError ("cgroups not found:" + cgroup_location )
134- try :
135- # Find memory.stat files
136- for job_id in os .listdir (cgroup_location ):
137- if "memory.peak" in job_id :
138- processes .append (Process (
139- cgroup_memory_path = cgroup_location + "/" + job_id ,
140- cgroup_cpu_path = cgroup_location + "/" + "cpu.stat" ,
141- job_id = job_id ))
142- except FileNotFoundError as e :
143- print (e )
144- raise FileNotFoundError ("cgroups not found:" + cgroup_location )
145-
146- # cpu_count = get_host_num_cpus(args.cpuset_path, processes)
137+
138+ if cgroup_version == 1 :
139+ try :
140+ processes .append (Process (
141+ cgroup_memory_path = args .cgroup_location + cgroup_name + "/" + "memory.peak" ,
142+ cgroup_cpu_path = args .cgroup_location + cgroup_name + "/" + "cpu.stat" ))
143+ except FileNotFoundError as e :
144+ print (e )
145+ raise FileNotFoundError ("cgroups not found:" + args .cgroup_location )
146+
147+ elif cgroup_version == 2 :
148+ try :
149+ processes .append (Process (
150+ cgroup_memory_path = args .cgroup_location + "/memory" + cgroup_name + "/memory.max_usage_in_bytes" ,
151+ cgroup_cpu_path = args .cgroup_location + "/cpu" + cgroup_name + "/cpuacct.usage" ))
152+ except FileNotFoundError as e :
153+ print (e )
154+ raise FileNotFoundError ("cgroups not found:" + args .cgroup_location )
155+
147156 while True :
148157 # Write memory usage data
149158 for process in processes :
@@ -152,20 +161,13 @@ def profile(args):
152161 memory = parse_memory_file (process )
153162 if memory > peak_memory :
154163 peak_memory = memory
155- write_data (process , str (peak_memory ), args . output_dir , ".memory" , "max_rss" )
156- cpu_time = parse_cpu_file (process )
157- write_data (process , str (cpu_time ), args . output_dir , ".cpu" , "cpu_time" )
164+ write_data (str (peak_memory ), "max_rss" )
165+ cpu_time = parse_cpu_file (process , cgroup_version )
166+ write_data (str (cpu_time ), "cpu_time" )
158167
159168 except (OSError , IOError , ValueError ) as error :
160169 print (error )
161170
162- # process, usage_percent = get_cpu_percent(
163- # cpu_count, args.proc_path,
164- # process, last_system_usage, last_cpu_usage)
165- #
166- # write_data(process, usage_percent,
167- # args.output_dir, ".cpu")
168-
169171 time .sleep (args .delay )
170172
171173
0 commit comments