diff --git a/algorithmic_efficiency/random_utils.py b/algorithmic_efficiency/random_utils.py index cf1ea6c32..f40a98003 100644 --- a/algorithmic_efficiency/random_utils.py +++ b/algorithmic_efficiency/random_utils.py @@ -18,30 +18,30 @@ # Annoyingly, RandomState(seed) requires seed to be in [0, 2 ** 32 - 1] (an # unsigned int), while RandomState.randint only accepts and returns signed ints. -MAX_INT32 = 2**31 -MIN_INT32 = -MAX_INT32 +MAX_UINT32 = 2**32 - 1 +MIN_UINT32 = 0 SeedType = Union[int, list, np.ndarray] def _signed_to_unsigned(seed: SeedType) -> SeedType: if isinstance(seed, int): - return seed % 2**32 + return seed % MAX_UINT32 if isinstance(seed, list): - return [s % 2**32 for s in seed] + return [s % MAX_UINT32 for s in seed] if isinstance(seed, np.ndarray): - return np.array([s % 2**32 for s in seed.tolist()]) + return np.array([s % MAX_UINT32 for s in seed.tolist()]) def _fold_in(seed: SeedType, data: Any) -> List[Union[SeedType, Any]]: rng = np.random.RandomState(seed=_signed_to_unsigned(seed)) - new_seed = rng.randint(MIN_INT32, MAX_INT32, dtype=np.int32) + new_seed = rng.randint(MIN_UINT32, MAX_UINT32, dtype=np.uint32) return [new_seed, data] def _split(seed: SeedType, num: int = 2) -> SeedType: rng = np.random.RandomState(seed=_signed_to_unsigned(seed)) - return rng.randint(MIN_INT32, MAX_INT32, dtype=np.int32, size=[num, 2]) + return rng.randint(MIN_UINT32, MAX_UINT32, dtype=np.uint32, size=[num, 2]) def _PRNGKey(seed: SeedType) -> SeedType: # pylint: disable=invalid-name diff --git a/scoring/performance_profile.py b/scoring/performance_profile.py index 32acae9ab..f4f2d5679 100644 --- a/scoring/performance_profile.py +++ b/scoring/performance_profile.py @@ -274,7 +274,8 @@ def compute_performance_profiles(submissions, scale='linear', verbosity=0, strict=False, - self_tuning_ruleset=False): + self_tuning_ruleset=False, + output_dir=None): """Compute performance profiles for a set of submission by some time column. Args: @@ -321,6 +322,8 @@ def compute_performance_profiles(submissions, # Sort workloads alphabetically (for better display) df = df.reindex(sorted(df.columns), axis=1) + # Save time to target dataframe + df.to_csv(os.path.join(output_dir, 'time_to_targets.csv')) # For each held-out workload set to inf if the base workload is inf or nan for workload in df.keys(): if workload not in BASE_WORKLOADS: diff --git a/scoring/score_submissions.py b/scoring/score_submissions.py index 1fb39d193..8cc06b15f 100644 --- a/scoring/score_submissions.py +++ b/scoring/score_submissions.py @@ -210,7 +210,9 @@ def main(_): scale='linear', verbosity=0, self_tuning_ruleset=FLAGS.self_tuning_ruleset, - strict=FLAGS.strict) + strict=FLAGS.strict, + output_dir=FLAGS.output_dir, + ) if not os.path.exists(FLAGS.output_dir): os.mkdir(FLAGS.output_dir) performance_profile.plot_performance_profiles( diff --git a/setup.cfg b/setup.cfg index eb570dafb..4afefd164 100644 --- a/setup.cfg +++ b/setup.cfg @@ -121,7 +121,6 @@ jax_core_deps = chex==0.1.7 ml_dtypes==0.2.0 protobuf==4.25.3 - scipy==1.11.4 # JAX CPU