diff --git a/jtop/core/power.py b/jtop/core/power.py index 27107467..6c2c2c4f 100644 --- a/jtop/core/power.py +++ b/jtop/core/power.py @@ -16,6 +16,7 @@ # along with this program. If not, see . from .common import cat, check_file +import glob import os # Logging import logging @@ -105,6 +106,46 @@ def find_all_i2c_power_monitor(i2c_path): return power_sensor +def find_all_oc_event_counters(): + """Find all the overcurrent event counters on the system""" + event_cnt_files = glob.glob('/sys/class/hwmon/hwmon*/oc*_event_cnt') + if (len(event_cnt_files) == 0): + logger.warning("No OC event counters found") + return {} + + event_counts = {filename: -1 for filename in event_cnt_files} + + update_oc_event_counts(event_counts) + + return event_counts + + +def update_oc_event_counts(event_counts): + """ + Function to update overcurrent event counts. + + Update the event counts in the event_counts dictionary, and return True if any of the counts have increased + """ + # We can report more granular information about the throttling events if we really want to, but there + # is no direct mapping from oc*_event_cnt to which power rail/system is being measured, we + # would need to hard code a mapping from board type to oc*_event_cnt->power rail mappings, + # this is fragile, and most users will probably only care about throttling or not throttling, + # and can use the existing power panel to see currents and current limits if they want to dig deeper. + # https://docs.nvidia.com/jetson/archives/r36.4/DeveloperGuide/SD/PlatformPowerAndPerformance/JetsonOrinNanoSeriesJetsonOrinNxSeriesAndJetsonAgxOrinSeries.html#jetson-agx-orin-series + throttling = False + for filename in event_counts: + try: + with open(filename, 'r') as f: + count = int(f.read()) + if count > event_counts[filename]: + event_counts[filename] = count + throttling = True + except Exception as e: + logger.error("Error reading OC event counter from {filename}: {e}".format(filename=filename, e=e)) + return throttling + return throttling + + def read_power_status(data): values = {} power_type = data['type'] @@ -237,6 +278,7 @@ class PowerService(object): def __init__(self): self._power_sensor = {} self._power_avg = {} + self._oc_event_counts = {} # Find all I2C sensors on board i2c_path = "/sys/bus/i2c/devices" system_monitor = "/sys/class/power_supply" @@ -248,6 +290,7 @@ def __init__(self): # Load all power sensors self._power_sensor = find_all_i2c_power_monitor(i2c_path) self._power_sensor.update(find_all_system_monitor(system_monitor)) + self._oc_event_counts = find_all_oc_event_counters() if not self._power_sensor: logger.warning("Power sensors not found!") # Sort all power sensors @@ -287,5 +330,15 @@ def get_status(self): rails[name] = values # Measure total power total, rails = total_power(rails) - return {'rail': rails, 'tot': total} -# EOF + ret_dict = {'rail': rails, 'tot': total} + + # Only include OC events if counters exist + if self._oc_event_counts: + oc_events = { + 'is_throttling': update_oc_event_counts(self._oc_event_counts), + 'count': sum(self._oc_event_counts.values()) + } + ret_dict['oc_events'] = oc_events + + return ret_dict + # EOF diff --git a/jtop/gui/pcontrol.py b/jtop/gui/pcontrol.py index a911192d..b53cbf34 100644 --- a/jtop/gui/pcontrol.py +++ b/jtop/gui/pcontrol.py @@ -70,6 +70,15 @@ def compact_temperatures(stdscr, pos_y, pos_x, width, height, jetson): return counter +def display_oc_event(stdscr, oc_events, pos_y, pos_x): + oc_event_cnt = oc_events['count'] + is_throttling = oc_events['is_throttling'] + # Plot OC_EVENT_CNT with color based on throttling status + color = NColors.red() if is_throttling else (NColors.yellow() if oc_event_cnt > 0 else NColors.green()) + stdscr.addstr(pos_y, pos_x, "OC EVENT COUNT: ", curses.A_BOLD) + stdscr.addstr(pos_y, pos_x + 16, str(oc_event_cnt), curses.A_BOLD | color) + + def compact_power(stdscr, pos_y, pos_x, width, height, jetson): LIMIT = 25 # center_x = pos_x + width // 2 if width > LIMIT else pos_x + width // 2 + 4 @@ -105,7 +114,20 @@ def compact_power(stdscr, pos_y, pos_x, width, height, jetson): if width > LIMIT: unit_avg = unit_to_string(total['avg'], 'm', 'W') stdscr.addstr(pos_y + len_power + 1, center_x + column_power - 3, unit_avg, curses.A_BOLD) - return len(power) + 1 + + # If there is no more space, return + if len_power + 3 >= height: + return len(power) + 1 + + # if there are no OC events, return + if not jetson.power['oc_events']: + return len(power) + 1 + + display_oc_event(stdscr, + jetson.power['oc_events'], + pos_y=pos_y + len_power + 3, + pos_x=center_x - column_power - 5) + return len(power) + 3 class CTRL(Page): @@ -374,6 +396,17 @@ def control_power(self, pos_y, pos_x, key, mouse): except curses.error: pass + # if there are no OC events, return + if not self.jetson.power['oc_events']: + return + + # Plot OC_EVENT_CNT + display_oc_event( + self.stdscr, + self.jetson.power['oc_events'], + pos_y=pos_y_table + len_power + 2, + pos_x=pos_x) + def draw(self, key, mouse): # Screen size height, width, first = self.size_page()