From a173c4a5b8a9d93567796592786ae58a51f89c4e Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Mon, 2 Aug 2021 15:35:11 -0400 Subject: [PATCH 01/11] Copy plot.py from pre-0.1 into dacapo module --- dacapo/plot.py | 207 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 dacapo/plot.py diff --git a/dacapo/plot.py b/dacapo/plot.py new file mode 100644 index 00000000..6a4a60bc --- /dev/null +++ b/dacapo/plot.py @@ -0,0 +1,207 @@ +from bokeh.palettes import Category20 as palette +import bokeh.layouts +import bokeh.plotting +import itertools +import numpy as np + + +def smooth_values(a, n, stride=1): + + a = np.array(a) + + # mean + m = np.cumsum(a) + m[n:] = m[n:] - m[:-n] + m = m[n - 1 :] / n + + # mean of squared values + m2 = np.cumsum(a ** 2) + m2[n:] = m2[n:] - m2[:-n] + m2 = m2[n - 1 :] / n + + # stddev + s = m2 - m ** 2 + + if stride > 1: + m = m[::stride] + s = s[::stride] + + return m, s + + +def plot_runs(runs, smooth=100, validation_score=None): + relation, validation_score = validation_score.split(":") + if relation == "min": + higher_is_better = False + elif relation == "max": + higher_is_better = True + else: + raise Exception(f"Don't know how to handle relation: {relation}") + + run_names = [str(run) for run in runs] + max_iteration = max([run.training_stats.trained_until() for run in runs]) + + colors = itertools.cycle(palette[20]) + loss_tooltips = [ + ("task", "@task"), + ("model", "@model"), + ("optimizer", "@optimizer"), + ("iteration", "@iteration"), + ("loss", "@loss"), + ] + loss_figure = bokeh.plotting.figure( + tools="pan, wheel_zoom, reset, save, hover", + x_axis_label="iterations", + tooltips=loss_tooltips, + plot_width=2048, + ) + loss_figure.background_fill_color = "#efefef" + + validation_score_names = [] + for r in runs: + if r.validation_scores.validated_until() > 0: + validation_score_names += r.validation_scores.get_score_names() + validation_score_names = np.unique(validation_score_names) + + validation_tooltips = [ + ("run", "@run"), + ("task", "@task"), + ("model", "@model"), + ("optimizer", "@optimizer"), + ("iteration", "@iteration"), + ] + [(name, "@" + name) for name in validation_score_names] + validation_figure = bokeh.plotting.figure( + tools="pan, wheel_zoom, reset, save, hover", + x_axis_label="iterations", + tooltips=validation_tooltips, + plot_width=2048, + ) + validation_figure.background_fill_color = "#efefef" + + summary_tooltips = [ + ("run", "@run"), + ("task", "@task"), + ("model", "@model"), + ("optimizer", "@optimizer"), + ("best iteration", "@iteration"), + ("best voi_split", "@voi_split"), + ("best voi_merge", "@voi_merge"), + ("best voi_sum", "@voi_sum"), + ("num parameters", "@num_parameters"), + ] + summary_figure = bokeh.plotting.figure( + tools="pan, wheel_zoom, reset, save, hover", + x_axis_label="model size", + y_axis_label="best validation", + tooltips=summary_tooltips, + plot_width=2048, + ) + summary_figure.background_fill_color = "#efefef" + + for run, color in zip(runs, colors): + + if run.training_stats.trained_until() > 0: + + name = str(run) + l = run.training_stats.iterations[-1] + + x, _ = smooth_values(run.training_stats.iterations, smooth, stride=smooth) + y, s = smooth_values(run.training_stats.losses, smooth, stride=smooth) + source = bokeh.plotting.ColumnDataSource( + { + "iteration": x, + "loss": y, + "task": [run.task_config.id] * len(x), + "model": [run.model_config.id] * len(x), + "optimizer": [run.optimizer_config.id] * len(x), + "run": [str(run)] * len(x), + } + ) + loss_figure.line( + "iteration", + "loss", + legend_label=name, + source=source, + color=color, + alpha=0.7, + ) + + loss_figure.patch( + np.concatenate([x, x[::-1]]), + np.concatenate([y + 3 * s, (y - 3 * s)[::-1]]), + legend_label=name, + color=color, + alpha=0.3, + ) + + if validation_score and run.validation_scores.validated_until() > 0: + + x = run.validation_scores.iterations + source_dict = { + "iteration": x, + "task": [run.task_config.id] * len(x), + "model": [run.model_config.id] * len(x), + "optimizer": [run.optimizer_config.id] * len(x), + "run": [str(run)] * len(x), + } + # TODO: get_best: higher_is_better is not true for all scores + validation_averages = run.validation_scores.get_best( + validation_score, higher_is_better=higher_is_better + ) + source_dict.update( + { + name: np.array(validation_averages[name]) + for name in run.validation_scores.get_score_names() + } + ) + source = bokeh.plotting.ColumnDataSource(source_dict) + validation_figure.line( + "iteration", + validation_score, + legend_label=name + " " + validation_score, + source=source, + color=color, + alpha=0.7, + ) + + # Styling + # training + loss_figure.title.text_font_size = "25pt" + loss_figure.title.text = "Training" + loss_figure.title.align = "center" + + loss_figure.legend.label_text_font_size = "16pt" + + loss_figure.xaxis.axis_label = "Iterations" + loss_figure.xaxis.axis_label_text_font_size = "20pt" + loss_figure.xaxis.major_label_text_font_size = "16pt" + loss_figure.xaxis.axis_label_text_font = "times" + loss_figure.xaxis.axis_label_text_color = "black" + + loss_figure.yaxis.axis_label = "Loss" + loss_figure.yaxis.axis_label_text_font_size = "20pt" + loss_figure.yaxis.major_label_text_font_size = "16pt" + loss_figure.yaxis.axis_label_text_font = "times" + loss_figure.yaxis.axis_label_text_color = "black" + + # validation + validation_figure.title.text_font_size = "25pt" + validation_figure.title.text = "Validation" + validation_figure.title.align = "center" + + validation_figure.legend.label_text_font_size = "16pt" + + validation_figure.xaxis.axis_label = "Iterations" + validation_figure.xaxis.axis_label_text_font_size = "20pt" + validation_figure.xaxis.major_label_text_font_size = "16pt" + validation_figure.xaxis.axis_label_text_font = "times" + validation_figure.xaxis.axis_label_text_color = "black" + + validation_figure.yaxis.axis_label = f"{validation_score.capitalize()}" + validation_figure.yaxis.axis_label_text_font_size = "20pt" + validation_figure.yaxis.major_label_text_font_size = "16pt" + validation_figure.yaxis.axis_label_text_font = "times" + validation_figure.yaxis.axis_label_text_color = "black" + + bokeh.plotting.output_file("performance_plots.html") + bokeh.plotting.save(bokeh.layouts.column(loss_figure, validation_figure)) From 49a79218269bf6af2f10f19df1ce039c12d05d16 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Mon, 2 Aug 2021 17:55:35 -0400 Subject: [PATCH 02/11] Refactor deprecated plot code to work with current version --- dacapo/plot.py | 90 +++++++++++++++++++++++++++++++++++++------------- 1 file changed, 67 insertions(+), 23 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 6a4a60bc..46a1bb84 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -1,8 +1,11 @@ +from dacapo.store.create_store import create_config_store, create_stats_store +from dacapo.experiments.run import Run from bokeh.palettes import Category20 as palette import bokeh.layouts import bokeh.plotting import itertools import numpy as np +from collections import namedtuple def smooth_values(a, n, stride=1): @@ -12,12 +15,12 @@ def smooth_values(a, n, stride=1): # mean m = np.cumsum(a) m[n:] = m[n:] - m[:-n] - m = m[n - 1 :] / n + m = m[n - 1:] / n # mean of squared values m2 = np.cumsum(a ** 2) m2[n:] = m2[n:] - m2[:-n] - m2 = m2[n - 1 :] / n + m2 = m2[n - 1:] / n # stddev s = m2 - m ** 2 @@ -29,7 +32,39 @@ def smooth_values(a, n, stride=1): return m, s -def plot_runs(runs, smooth=100, validation_score=None): +def get_runs_info(run_config_names): + config_store = create_config_store() + stats_store = create_stats_store() + runs = [] + + RunInfo = namedtuple("run_info", + ["name", + "task", + "architecture", + "trainer", + "dataset", + "training_stats", + "validation_scores"]) + + for run_config_name in run_config_names: + run_config = config_store.retrieve_run_config(run_config_name) + run = RunInfo(run_config_name, + run_config.task_config.name, + run_config.architecture_config.name, + run_config.trainer_config.name, + run_config.dataset_config.name, + stats_store.retrieve_training_stats( + run_config_name), + stats_store.retrieve_validation_scores( + run_config_name) + ) + + runs.append(run) + + return runs + + +def plot_runs(run_config_names, smooth=100, validation_score=None): relation, validation_score = validation_score.split(":") if relation == "min": higher_is_better = False @@ -38,14 +73,14 @@ def plot_runs(runs, smooth=100, validation_score=None): else: raise Exception(f"Don't know how to handle relation: {relation}") - run_names = [str(run) for run in runs] - max_iteration = max([run.training_stats.trained_until() for run in runs]) + runs = get_runs_info(run_config_names) colors = itertools.cycle(palette[20]) loss_tooltips = [ ("task", "@task"), - ("model", "@model"), - ("optimizer", "@optimizer"), + ("architecture", "@architecture"), + ("trainer", "@trainer"), + ("dataset", "@dataset"), ("iteration", "@iteration"), ("loss", "@loss"), ] @@ -66,9 +101,9 @@ def plot_runs(runs, smooth=100, validation_score=None): validation_tooltips = [ ("run", "@run"), ("task", "@task"), - ("model", "@model"), - ("optimizer", "@optimizer"), - ("iteration", "@iteration"), + ("architecture", "@architecture"), + ("trainer", "@trainer"), + ("dataset", "@dataset"), ] + [(name, "@" + name) for name in validation_score_names] validation_figure = bokeh.plotting.figure( tools="pan, wheel_zoom, reset, save, hover", @@ -81,8 +116,9 @@ def plot_runs(runs, smooth=100, validation_score=None): summary_tooltips = [ ("run", "@run"), ("task", "@task"), - ("model", "@model"), - ("optimizer", "@optimizer"), + ("architecture", "@architecture"), + ("trainer", "@trainer"), + ("dataset", "@dataset"), ("best iteration", "@iteration"), ("best voi_split", "@voi_split"), ("best voi_merge", "@voi_merge"), @@ -102,18 +138,25 @@ def plot_runs(runs, smooth=100, validation_score=None): if run.training_stats.trained_until() > 0: - name = str(run) - l = run.training_stats.iterations[-1] - - x, _ = smooth_values(run.training_stats.iterations, smooth, stride=smooth) - y, s = smooth_values(run.training_stats.losses, smooth, stride=smooth) + name = run.name + #l = run.training_stats.iterations[-1] + + iterations = [stat.iteration + for stat in run.training_stats.iteration_stats] + losses = [stat.loss + for stat in run.training_stats.iteration_stats] + x, _ = smooth_values( + iterations, smooth, stride=smooth) + y, s = smooth_values(losses, + smooth, stride=smooth) source = bokeh.plotting.ColumnDataSource( { "iteration": x, "loss": y, - "task": [run.task_config.id] * len(x), - "model": [run.model_config.id] * len(x), - "optimizer": [run.optimizer_config.id] * len(x), + "task": [run.task] * len(x), + "architecture": [run.architecture] * len(x), + "trainer": [run.trainer] * len(x), + "dataset": [run.dataset] * len(x), "run": [str(run)] * len(x), } ) @@ -139,9 +182,10 @@ def plot_runs(runs, smooth=100, validation_score=None): x = run.validation_scores.iterations source_dict = { "iteration": x, - "task": [run.task_config.id] * len(x), - "model": [run.model_config.id] * len(x), - "optimizer": [run.optimizer_config.id] * len(x), + "task": [run.task.name] * len(x), + "architecture": [run.architecture] * len(x), + "trainer": [run.trainer] * len(x), + "dataset": [run.dataset.name] * len(x), "run": [str(run)] * len(x), } # TODO: get_best: higher_is_better is not true for all scores From 2cc16bd1afeaeb31795f8b28a89f651e9d66ac0e Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 3 Aug 2021 10:44:16 -0400 Subject: [PATCH 03/11] Move return statement and cleanup --- dacapo/plot.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 46a1bb84..8c834937 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -1,5 +1,4 @@ from dacapo.store.create_store import create_config_store, create_stats_store -from dacapo.experiments.run import Run from bokeh.palettes import Category20 as palette import bokeh.layouts import bokeh.plotting @@ -33,6 +32,7 @@ def smooth_values(a, n, stride=1): def get_runs_info(run_config_names): + config_store = create_config_store() stats_store = create_stats_store() runs = [] @@ -58,10 +58,9 @@ def get_runs_info(run_config_names): stats_store.retrieve_validation_scores( run_config_name) ) - runs.append(run) - return runs + return runs def plot_runs(run_config_names, smooth=100, validation_score=None): From a9efd34c9475c613abd3a71967c48416e2991506 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 24 Aug 2021 14:25:05 -0400 Subject: [PATCH 04/11] Update retrieval of names and get_best for validation scores --- dacapo/experiments/validation_scores.py | 49 ++++++++++++++++++++----- 1 file changed, 39 insertions(+), 10 deletions(-) diff --git a/dacapo/experiments/validation_scores.py b/dacapo/experiments/validation_scores.py index 7d4678a8..522cd996 100644 --- a/dacapo/experiments/validation_scores.py +++ b/dacapo/experiments/validation_scores.py @@ -1,6 +1,8 @@ from .validation_iteration_scores import ValidationIterationScores from typing import List import attr +import inspect +import numpy as np @attr.s @@ -29,31 +31,58 @@ def validated_until(self): return 0 return max([score.iteration for score in self.iteration_scores]) + 1 + def get_attribute_names(self, class_instance): + + attributes = inspect.getmembers( + class_instance, lambda a: not(inspect.isroutine(a))) + names = [a[0] for a in attributes if not( + a[0].startswith('__') and a[0].endswith('__'))] + return names -''' def get_score_names(self): - for scores in self.scores: - for parameters, sample_scores in scores.items(): - return sample_scores['scores']['average'].keys() + if self.iteration_scores: + example_parameter_scores = self.iteration_scores[0].parameter_scores + score_class_instance = example_parameter_scores[0][1] + return self.get_attribute_names(score_class_instance) + + raise RuntimeError("No scores were added, yet") + + def get_postprocessor_parameter_names(self): + + if self.iteration_scores: + example_parameter_scores = self.iteration_scores[0].parameter_scores + postprocessor_class_instance = example_parameter_scores[0][0] + return self.get_attribute_names(postprocessor_class_instance) raise RuntimeError("No scores were added, yet") def get_best(self, score_name=None, higher_is_better=True): names = self.get_score_names() + postprocessor_parameter_names = self.get_postprocessor_parameter_names() best_scores = {name: [] for name in names} - for iteration_scores in self.scores: + best_score_parameters = {name: [] + for name in postprocessor_parameter_names} + + for iteration_score in self.iteration_scores: ips = np.array([ - parameter_scores['scores']['average'].get(score_name, np.nan) - for parameter_scores in iteration_scores.values() + getattr(parameter_score[1], score_name, np.nan) + for parameter_score in iteration_score.parameter_scores ], dtype=np.float32) ips[np.isnan(ips)] = -np.inf if higher_is_better else np.inf i = np.argmax(ips) if higher_is_better else np.argmin(ips) + best_score = iteration_score.parameter_scores[i] + for name in names: best_scores[name].append( - list(iteration_scores.values())[i]['scores']['average'].get(name, 0) + getattr(best_score[1], name) ) - return best_scores -''' + + for name in postprocessor_parameter_names: + best_score_parameters[name].append( + getattr(best_score[0], name) + ) + + return (best_score_parameters, best_scores) From 34cb1a222772e2bd274253a25c0d19d852aa7312 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 24 Aug 2021 14:27:03 -0400 Subject: [PATCH 05/11] Update plotting to allow for selection of score names and other options --- dacapo/plot.py | 304 ++++++++++++++++++++++++++++--------------------- 1 file changed, 177 insertions(+), 127 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 8c834937..5d27dc18 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -1,3 +1,5 @@ +import json +from bokeh.embed.standalone import json_item from dacapo.store.create_store import create_config_store, create_stats_store from bokeh.palettes import Category20 as palette import bokeh.layouts @@ -31,7 +33,10 @@ def smooth_values(a, n, stride=1): return m, s -def get_runs_info(run_config_names): +def get_runs_info(run_config_base_names, + validation_score_names, + higher_is_betters, + plot_losses): config_store = create_config_store() stats_store = create_stats_store() @@ -44,35 +49,43 @@ def get_runs_info(run_config_names): "trainer", "dataset", "training_stats", - "validation_scores"]) - - for run_config_name in run_config_names: - run_config = config_store.retrieve_run_config(run_config_name) - run = RunInfo(run_config_name, - run_config.task_config.name, - run_config.architecture_config.name, - run_config.trainer_config.name, - run_config.dataset_config.name, - stats_store.retrieve_training_stats( - run_config_name), - stats_store.retrieve_validation_scores( - run_config_name) - ) - runs.append(run) + "validation_scores", + "validation_score_name", + "higher_is_better", + "plot_loss"]) + + all_run_config_names = config_store.retrieve_run_config_names() + for run_config_name in all_run_config_names: + run_config_base_name = run_config_name.split(":")[0] + if run_config_base_name in run_config_base_names: + idx = run_config_base_names.index(run_config_base_name) + validation_score_name = validation_score_names[idx] + higher_is_better = higher_is_betters[idx] + plot_loss = plot_losses[idx] + + run_config = config_store.retrieve_run_config(run_config_name) + run = RunInfo(run_config_name, + run_config.task_config.name, + run_config.architecture_config.name, + run_config.trainer_config.name, + run_config.dataset_config.name, + stats_store.retrieve_training_stats( + run_config_name), + stats_store.retrieve_validation_scores( + run_config_name), + validation_score_name, + higher_is_better, + plot_loss + ) + runs.append(run) return runs -def plot_runs(run_config_names, smooth=100, validation_score=None): - relation, validation_score = validation_score.split(":") - if relation == "min": - higher_is_better = False - elif relation == "max": - higher_is_better = True - else: - raise Exception(f"Don't know how to handle relation: {relation}") +def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_is_betters=None, plot_losses=None, return_json=False): - runs = get_runs_info(run_config_names) + runs = get_runs_info(run_config_base_names, + validation_scores, higher_is_betters, plot_losses) colors = itertools.cycle(palette[20]) loss_tooltips = [ @@ -91,26 +104,35 @@ def plot_runs(run_config_names, smooth=100, validation_score=None): ) loss_figure.background_fill_color = "#efefef" - validation_score_names = [] - for r in runs: - if r.validation_scores.validated_until() > 0: - validation_score_names += r.validation_scores.get_score_names() - validation_score_names = np.unique(validation_score_names) - - validation_tooltips = [ - ("run", "@run"), - ("task", "@task"), - ("architecture", "@architecture"), - ("trainer", "@trainer"), - ("dataset", "@dataset"), - ] + [(name, "@" + name) for name in validation_score_names] - validation_figure = bokeh.plotting.figure( - tools="pan, wheel_zoom, reset, save, hover", - x_axis_label="iterations", - tooltips=validation_tooltips, - plot_width=2048, - ) - validation_figure.background_fill_color = "#efefef" + if validation_scores: + validation_score_names = [] + validation_postprocessor_parameter_names = [] + for r in runs: + if r.validation_scores.validated_until() > 0: + validation_score_names += r.validation_scores.get_score_names() + validation_postprocessor_parameter_names += r.validation_scores.get_postprocessor_parameter_names() + + validation_score_names = np.unique(validation_score_names) + validation_postprocessor_parameter_names = np.unique( + validation_postprocessor_parameter_names) + + validation_tooltips = [ + ("run", "@run"), + ("task", "@task"), + ("architecture", "@architecture"), + ("trainer", "@trainer"), + ("dataset", "@dataset"), + ] + [(name, "@" + name) for name in validation_score_names] \ + + [(name, "@" + name) + for name in validation_postprocessor_parameter_names] + + validation_figure = bokeh.plotting.figure( + tools="pan, wheel_zoom, reset, save, hover", + x_axis_label="iterations", + tooltips=validation_tooltips, + plot_width=2048, + ) + validation_figure.background_fill_color = "#efefef" summary_tooltips = [ ("run", "@run"), @@ -133,8 +155,10 @@ def plot_runs(run_config_names, smooth=100, validation_score=None): ) summary_figure.background_fill_color = "#efefef" - for run, color in zip(runs, colors): + include_validation_figure = False + include_loss_figure = False + for run, color in zip(runs, colors): if run.training_stats.trained_until() > 0: name = run.name @@ -144,64 +168,77 @@ def plot_runs(run_config_names, smooth=100, validation_score=None): for stat in run.training_stats.iteration_stats] losses = [stat.loss for stat in run.training_stats.iteration_stats] - x, _ = smooth_values( - iterations, smooth, stride=smooth) - y, s = smooth_values(losses, - smooth, stride=smooth) - source = bokeh.plotting.ColumnDataSource( - { - "iteration": x, - "loss": y, - "task": [run.task] * len(x), - "architecture": [run.architecture] * len(x), - "trainer": [run.trainer] * len(x), - "dataset": [run.dataset] * len(x), - "run": [str(run)] * len(x), - } - ) - loss_figure.line( - "iteration", - "loss", - legend_label=name, - source=source, - color=color, - alpha=0.7, - ) - - loss_figure.patch( - np.concatenate([x, x[::-1]]), - np.concatenate([y + 3 * s, (y - 3 * s)[::-1]]), - legend_label=name, - color=color, - alpha=0.3, - ) - if validation_score and run.validation_scores.validated_until() > 0: - - x = run.validation_scores.iterations + if run.plot_loss: + include_loss_figure = True + x, _ = smooth_values( + iterations, smooth, stride=smooth) + y, s = smooth_values(losses, + smooth, stride=smooth) + source = bokeh.plotting.ColumnDataSource( + { + "iteration": x, + "loss": y, + "task": [run.task] * len(x), + "architecture": [run.architecture] * len(x), + "trainer": [run.trainer] * len(x), + "dataset": [run.dataset] * len(x), + "run": [name] * len(x), + } + ) + loss_figure.line( + "iteration", + "loss", + legend_label=name, + source=source, + color=color, + alpha=0.7, + ) + + loss_figure.patch( + np.concatenate([x, x[::-1]]), + np.concatenate([y + 3 * s, (y - 3 * s)[::-1]]), + legend_label=name, + color=color, + alpha=0.3, + ) + + if run.validation_score_name and run.validation_scores.validated_until() > 0: + include_validation_figure = True + x = [score.iteration + for score in run.validation_scores.iteration_scores] source_dict = { "iteration": x, - "task": [run.task.name] * len(x), + "task": [run.task] * len(x), "architecture": [run.architecture] * len(x), "trainer": [run.trainer] * len(x), - "dataset": [run.dataset.name] * len(x), - "run": [str(run)] * len(x), + "dataset": [run.dataset] * len(x), + "run": [run.name] * len(x), } # TODO: get_best: higher_is_better is not true for all scores - validation_averages = run.validation_scores.get_best( - validation_score, higher_is_better=higher_is_better + validation_bests = run.validation_scores.get_best( + run.validation_score_name, higher_is_better=run.higher_is_better ) + best_validation_parameters = validation_bests[0] + best_validation_scores = validation_bests[1] + source_dict.update( { - name: np.array(validation_averages[name]) + name: np.array(best_validation_parameters[name]) + for name in run.validation_scores.get_postprocessor_parameter_names() + } + ) + source_dict.update( + { + name: np.array(best_validation_scores[name]) for name in run.validation_scores.get_score_names() } ) source = bokeh.plotting.ColumnDataSource(source_dict) validation_figure.line( "iteration", - validation_score, - legend_label=name + " " + validation_score, + run.validation_score_name, + legend_label=name + " " + run.validation_score_name, source=source, color=color, alpha=0.7, @@ -209,42 +246,55 @@ def plot_runs(run_config_names, smooth=100, validation_score=None): # Styling # training - loss_figure.title.text_font_size = "25pt" - loss_figure.title.text = "Training" - loss_figure.title.align = "center" - - loss_figure.legend.label_text_font_size = "16pt" - - loss_figure.xaxis.axis_label = "Iterations" - loss_figure.xaxis.axis_label_text_font_size = "20pt" - loss_figure.xaxis.major_label_text_font_size = "16pt" - loss_figure.xaxis.axis_label_text_font = "times" - loss_figure.xaxis.axis_label_text_color = "black" - - loss_figure.yaxis.axis_label = "Loss" - loss_figure.yaxis.axis_label_text_font_size = "20pt" - loss_figure.yaxis.major_label_text_font_size = "16pt" - loss_figure.yaxis.axis_label_text_font = "times" - loss_figure.yaxis.axis_label_text_color = "black" - - # validation - validation_figure.title.text_font_size = "25pt" - validation_figure.title.text = "Validation" - validation_figure.title.align = "center" - - validation_figure.legend.label_text_font_size = "16pt" - - validation_figure.xaxis.axis_label = "Iterations" - validation_figure.xaxis.axis_label_text_font_size = "20pt" - validation_figure.xaxis.major_label_text_font_size = "16pt" - validation_figure.xaxis.axis_label_text_font = "times" - validation_figure.xaxis.axis_label_text_color = "black" - - validation_figure.yaxis.axis_label = f"{validation_score.capitalize()}" - validation_figure.yaxis.axis_label_text_font_size = "20pt" - validation_figure.yaxis.major_label_text_font_size = "16pt" - validation_figure.yaxis.axis_label_text_font = "times" - validation_figure.yaxis.axis_label_text_color = "black" - - bokeh.plotting.output_file("performance_plots.html") - bokeh.plotting.save(bokeh.layouts.column(loss_figure, validation_figure)) + figures = [] + if include_loss_figure: + loss_figure.title.text_font_size = "25pt" + loss_figure.title.text = "Training" + loss_figure.title.align = "center" + + loss_figure.legend.label_text_font_size = "16pt" + + loss_figure.xaxis.axis_label = "Iterations" + loss_figure.xaxis.axis_label_text_font_size = "20pt" + loss_figure.xaxis.major_label_text_font_size = "16pt" + loss_figure.xaxis.axis_label_text_font = "times" + loss_figure.xaxis.axis_label_text_color = "black" + + loss_figure.yaxis.axis_label = "Loss" + loss_figure.yaxis.axis_label_text_font_size = "20pt" + loss_figure.yaxis.major_label_text_font_size = "16pt" + loss_figure.yaxis.axis_label_text_font = "times" + loss_figure.yaxis.axis_label_text_color = "black" + loss_figure.sizing_mode = 'scale_width' + figures.append(loss_figure) + + if include_validation_figure: + # validation + validation_figure.title.text_font_size = "25pt" + validation_figure.title.text = "Validation" + validation_figure.title.align = "center" + + validation_figure.legend.label_text_font_size = "16pt" + + validation_figure.xaxis.axis_label = "Iterations" + validation_figure.xaxis.axis_label_text_font_size = "20pt" + validation_figure.xaxis.major_label_text_font_size = "16pt" + validation_figure.xaxis.axis_label_text_font = "times" + validation_figure.xaxis.axis_label_text_color = "black" + + validation_figure.yaxis.axis_label = "Validation Score" + validation_figure.yaxis.axis_label_text_font_size = "20pt" + validation_figure.yaxis.major_label_text_font_size = "16pt" + validation_figure.yaxis.axis_label_text_font = "times" + validation_figure.yaxis.axis_label_text_color = "black" + validation_figure.sizing_mode = 'scale_width' + figures.append(validation_figure) + + plot = bokeh.layouts.column(*figures) + plot.sizing_mode = 'scale_width' + + if return_json: + return json.dumps(json_item(plot, "myplot")) + else: + bokeh.plotting.output_file("performance_plots.html") + bokeh.plotting.save(plot) From 953cc3152ec8b47374c25834a5cd40eecf546b46 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 24 Aug 2021 16:31:41 -0400 Subject: [PATCH 06/11] Update plot to only retrieve loss info if it will be plotted --- dacapo/plot.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 5d27dc18..0efb26eb 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -7,6 +7,7 @@ import itertools import numpy as np from collections import namedtuple +import time def smooth_values(a, n, stride=1): @@ -69,8 +70,8 @@ def get_runs_info(run_config_base_names, run_config.architecture_config.name, run_config.trainer_config.name, run_config.dataset_config.name, - stats_store.retrieve_training_stats( - run_config_name), + [stats_store.retrieve_training_stats( + run_config_name) if plot_loss else None][0], stats_store.retrieve_validation_scores( run_config_name), validation_score_name, @@ -83,7 +84,6 @@ def get_runs_info(run_config_base_names, def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_is_betters=None, plot_losses=None, return_json=False): - runs = get_runs_info(run_config_base_names, validation_scores, higher_is_betters, plot_losses) @@ -159,11 +159,9 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ include_loss_figure = False for run, color in zip(runs, colors): - if run.training_stats.trained_until() > 0: - - name = run.name - #l = run.training_stats.iterations[-1] - + name = run.name + + if run.plot_loss: iterations = [stat.iteration for stat in run.training_stats.iteration_stats] losses = [stat.loss From 45bf75353f8e6619616eed9e9311a49e53b2b144 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Wed, 25 Aug 2021 10:32:49 -0400 Subject: [PATCH 07/11] Remove unneceessary import --- dacapo/plot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 0efb26eb..2a5d1222 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -7,7 +7,6 @@ import itertools import numpy as np from collections import namedtuple -import time def smooth_values(a, n, stride=1): From 9cad874a68096625ea9b52c55b040eabd925a700 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Thu, 26 Aug 2021 10:17:13 -0400 Subject: [PATCH 08/11] Simplify if else statement in plot code --- dacapo/plot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 2a5d1222..0108a2ae 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -69,8 +69,8 @@ def get_runs_info(run_config_base_names, run_config.architecture_config.name, run_config.trainer_config.name, run_config.dataset_config.name, - [stats_store.retrieve_training_stats( - run_config_name) if plot_loss else None][0], + stats_store.retrieve_training_stats( + run_config_name) if plot_loss else None, stats_store.retrieve_validation_scores( run_config_name), validation_score_name, From e9981784b3bdd55cb4494e806c691c9bf3d5127f Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 16 Nov 2021 11:01:19 -0500 Subject: [PATCH 09/11] fix import --- dacapo/experiments/arraytypes/binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dacapo/experiments/arraytypes/binary.py b/dacapo/experiments/arraytypes/binary.py index 8219da4c..9dc6eb3f 100644 --- a/dacapo/experiments/arraytypes/binary.py +++ b/dacapo/experiments/arraytypes/binary.py @@ -1,4 +1,4 @@ -from arraytype import ArrayType +from .arraytype import ArrayType import attr From 65ebdb8b23dbe6568091a589b750b9dbf479f3b1 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 30 Nov 2021 10:55:07 -0500 Subject: [PATCH 10/11] change array config to array --- .../datasplits/datasets/arrays/cellmap_array_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dacapo/experiments/datasplits/datasets/arrays/cellmap_array_config.py b/dacapo/experiments/datasplits/datasets/arrays/cellmap_array_config.py index 952293cb..43cf80c9 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/cellmap_array_config.py +++ b/dacapo/experiments/datasplits/datasets/arrays/cellmap_array_config.py @@ -2,7 +2,7 @@ from .array_config import ArrayConfig from .cellmap_array import CellMapArray -from .array_config import ArrayConfig +from .array import Array from typing import List @@ -14,7 +14,7 @@ class CellMapArrayConfig(ArrayConfig): array_type = CellMapArray - source_array_config: ArrayConfig = attr.ib( + source_array_config: Array = attr.ib( metadata={ "help_text": "The Array from which to pull annotated data. Is expected to contain a volume with uint64 voxels and no channel dimension" } From ff5024e3e8e49b00617f5beb691805268aba9ed6 Mon Sep 17 00:00:00 2001 From: David Ackerman Date: Tue, 30 Nov 2021 10:55:22 -0500 Subject: [PATCH 11/11] update plot smoothing --- dacapo/plot.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/dacapo/plot.py b/dacapo/plot.py index 0108a2ae..6a8d52bd 100644 --- a/dacapo/plot.py +++ b/dacapo/plot.py @@ -47,7 +47,7 @@ def get_runs_info(run_config_base_names, "task", "architecture", "trainer", - "dataset", + "datasplit", "training_stats", "validation_scores", "validation_score_name", @@ -68,7 +68,7 @@ def get_runs_info(run_config_base_names, run_config.task_config.name, run_config.architecture_config.name, run_config.trainer_config.name, - run_config.dataset_config.name, + run_config.datasplit_config.name, stats_store.retrieve_training_stats( run_config_name) if plot_loss else None, stats_store.retrieve_validation_scores( @@ -91,7 +91,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ ("task", "@task"), ("architecture", "@architecture"), ("trainer", "@trainer"), - ("dataset", "@dataset"), + ("datasplit", "@datasplit"), ("iteration", "@iteration"), ("loss", "@loss"), ] @@ -120,7 +120,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ ("task", "@task"), ("architecture", "@architecture"), ("trainer", "@trainer"), - ("dataset", "@dataset"), + ("datasplit", "@datasplit"), ] + [(name, "@" + name) for name in validation_score_names] \ + [(name, "@" + name) for name in validation_postprocessor_parameter_names] @@ -138,7 +138,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ ("task", "@task"), ("architecture", "@architecture"), ("trainer", "@trainer"), - ("dataset", "@dataset"), + ("datasplit", "@datasplit"), ("best iteration", "@iteration"), ("best voi_split", "@voi_split"), ("best voi_merge", "@voi_merge"), @@ -168,6 +168,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ if run.plot_loss: include_loss_figure = True + smooth = int(np.maximum(len(iterations)/2500, 1)) x, _ = smooth_values( iterations, smooth, stride=smooth) y, s = smooth_values(losses, @@ -179,7 +180,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ "task": [run.task] * len(x), "architecture": [run.architecture] * len(x), "trainer": [run.trainer] * len(x), - "dataset": [run.dataset] * len(x), + "datasplit": [run.datasplit] * len(x), "run": [name] * len(x), } ) @@ -209,7 +210,7 @@ def plot_runs(run_config_base_names, smooth=100, validation_scores=None, higher_ "task": [run.task] * len(x), "architecture": [run.architecture] * len(x), "trainer": [run.trainer] * len(x), - "dataset": [run.dataset] * len(x), + "datasplit": [run.datasplit] * len(x), "run": [run.name] * len(x), } # TODO: get_best: higher_is_better is not true for all scores