diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml index ad03af00..a9ebfdec 100644 --- a/.github/workflows/black.yaml +++ b/.github/workflows/black.yaml @@ -1,17 +1,27 @@ -name: Python Black +name: black-action on: [push, pull_request] jobs: - lint: - name: Python Lint + linter_name: + name: runner / black runs-on: ubuntu-latest steps: - - name: Setup Python - uses: actions/setup-python@v1 - - name: Setup checkout - uses: actions/checkout@master - - name: Lint with Black - run: | - pip install black - black -v --check dacapo tests + - uses: actions/checkout@v2 + - name: Check files using the black formatter + uses: rickstaa/action-black@v1 + id: action_black + with: + black_args: "." + - name: Create Pull Request + if: steps.action_black.outputs.is_formatted == 'true' + uses: peter-evans/create-pull-request@v3 + with: + token: ${{ secrets.GITHUB_TOKEN }} + title: "Format Python code with psf/black push" + commit-message: ":art: Format Python code with psf/black" + body: | + There appear to be some python formatting errors in ${{ github.sha }}. This pull request + uses the [psf/black](https://github.com/psf/black) formatter to fix these issues. + base: ${{ github.head_ref }} # Creates pull request onto pull request or commit branch + branch: actions/black \ No newline at end of file diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index 5a84cc86..d8d7b388 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -1,8 +1,7 @@ -name: Pages -on: - push: - branches: - - master +name: Generate Pages + +on: [push, pull_request] + jobs: docs: runs-on: ubuntu-latest diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml deleted file mode 100644 index 58d200cf..00000000 --- a/.github/workflows/publish.yaml +++ /dev/null @@ -1,34 +0,0 @@ -name: Publish - -on: - push: - tags: "*" - -jobs: - build-n-publish: - name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@master - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install pypa/build - run: >- - python -m - pip install - build - --user - - name: Build a binary wheel and a source tarball - run: >- - python -m - build - --sdist - --wheel - --outdir dist/ - - name: Publish distribution 📦 to PyPI - if: startsWith(github.ref, 'refs/tags') - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index 020ca307..2ecaf3f0 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -1,7 +1,6 @@ name: Test -on: - push: +on: [push, pull_request] jobs: test: @@ -9,7 +8,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10"] steps: - uses: actions/checkout@v2 @@ -23,4 +22,4 @@ jobs: pip install -r requirements-dev.txt - name: Test with pytest run: | - pytest tests \ No newline at end of file + pytest tests diff --git a/README.md b/README.md index a51d4f99..64d35064 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -![DaCapo](docs/source/_static/dacapo.svg) +# DaCapo ![DaCapo](docs/source/_static/icon_dacapo.png) [![tests](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/tests.yaml) [![black](https://github.com/funkelab/dacapo/actions/workflows/black.yaml/badge.svg)](https://github.com/funkelab/dacapo/actions/workflows/black.yaml) diff --git a/dacapo/apply.py b/dacapo/apply.py index 64f23df3..434002ef 100644 --- a/dacapo/apply.py +++ b/dacapo/apply.py @@ -10,3 +10,4 @@ def apply(run_name: str, iteration: int, dataset_name: str): iteration, dataset_name, ) + raise NotImplementedError("This function is not yet implemented.") diff --git a/dacapo/cli.py b/dacapo/cli.py index 76a5e18e..be59df0c 100644 --- a/dacapo/cli.py +++ b/dacapo/cli.py @@ -40,7 +40,7 @@ def validate(run_name, iteration): @cli.command() @click.option( - "-r", "--run", required=True, type=str, help="The name of the run to use." + "-r", "--run-name", required=True, type=str, help="The name of the run to use." ) @click.option( "-i", diff --git a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py index 122526b1..1475c7b9 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/concat_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/concat_array.py @@ -5,6 +5,9 @@ import numpy as np from typing import Dict, Any +import logging + +logger = logging.getLogger(__file__) class ConcatArray(Array): @@ -116,5 +119,7 @@ def __getitem__(self, roi: Roi) -> np.ndarray: axis=0, ) if concatenated.shape[0] == 1: - raise Exception(f"{concatenated.shape}, shapes") + logger.info( + f"Concatenated array has only one channel: {self.name} {concatenated.shape}" + ) return concatenated diff --git a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py index beaa474d..e08ffe56 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/dvid_array.py @@ -41,7 +41,7 @@ def attrs(self): @property def axes(self): - return ["t", "z", "y", "x"][-self.dims :] + return ["c", "z", "y", "x"][-self.dims :] @property def dims(self) -> int: diff --git a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py index 7101d737..5f2bc048 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/numpy_array.py @@ -35,7 +35,7 @@ def from_gp_array(cls, array: gp.Array): ((["b", "c"] if len(array.data.shape) == instance.dims + 2 else [])) + (["c"] if len(array.data.shape) == instance.dims + 1 else []) + [ - "t", + "c", "z", "y", "x", diff --git a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py index e16ef26e..ccdf5037 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/tiff_array.py @@ -56,7 +56,7 @@ def voxel_size(self) -> Coordinate: @lazy_property.LazyProperty def roi(self) -> Roi: - return Roi(self._offset * self.shape) + return Roi(self._offset, self.shape) @property def writable(self) -> bool: diff --git a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py index cadfcb6c..25f2c224 100644 --- a/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py +++ b/dacapo/experiments/datasplits/datasets/arrays/zarr_array.py @@ -52,9 +52,9 @@ def axes(self): logger.debug( "DaCapo expects Zarr datasets to have an 'axes' attribute!\n" f"Zarr {self.file_name} and dataset {self.dataset} has attributes: {list(self._attributes.items())}\n" - f"Using default {['t', 'z', 'y', 'x'][-self.dims::]}", + f"Using default {['c', 'z', 'y', 'x'][-self.dims::]}", ) - return ["t", "z", "y", "x"][-self.dims : :] + return ["c", "z", "y", "x"][-self.dims : :] @property def dims(self) -> int: diff --git a/dacapo/experiments/model.py b/dacapo/experiments/model.py index bbaacb2d..8ca2b2b9 100644 --- a/dacapo/experiments/model.py +++ b/dacapo/experiments/model.py @@ -24,7 +24,7 @@ def __init__( self, architecture: Architecture, prediction_head: torch.nn.Module, - eval_activation: torch.nn.Module = None, + eval_activation: torch.nn.Module | None = None, ): super().__init__() @@ -46,7 +46,7 @@ def forward(self, x): result = self.eval_activation(result) return result - def compute_output_shape(self, input_shape: Coordinate) -> Coordinate: + def compute_output_shape(self, input_shape: Coordinate) -> Tuple[int, Coordinate]: """Compute the spatial shape (i.e., not accounting for channels and batch dimensions) of this model, when fed a tensor of the given spatial shape as input.""" diff --git a/dacapo/experiments/starts/start.py b/dacapo/experiments/starts/start.py index a5b68069..da7badbf 100644 --- a/dacapo/experiments/starts/start.py +++ b/dacapo/experiments/starts/start.py @@ -15,9 +15,21 @@ def initialize_weights(self, model): weights_store = create_weights_store() weights = weights_store._retrieve_weights(self.run, self.criterion) logger.info(f"loading weights from run {self.run}, criterion: {self.criterion}") - # load the model weights (taken from torch load_state_dict source) try: model.load_state_dict(weights.model) except RuntimeError as e: logger.warning(e) + # if the model is not the same, we can try to load the weights + # of the common layers + model_dict = model.state_dict() + pretrained_dict = { + k: v + for k, v in weights.model.items() + if k in model_dict and v.size() == model_dict[k].size() + } + model_dict.update( + pretrained_dict + ) # update only the existing and matching layers + model.load_state_dict(model_dict) + logger.warning(f"loaded only common layers from weights") diff --git a/dacapo/experiments/tasks/affinities_task.py b/dacapo/experiments/tasks/affinities_task.py index c1014fd0..859494e7 100644 --- a/dacapo/experiments/tasks/affinities_task.py +++ b/dacapo/experiments/tasks/affinities_task.py @@ -14,6 +14,8 @@ def __init__(self, task_config): self.predictor = AffinitiesPredictor( neighborhood=task_config.neighborhood, lsds=task_config.lsds ) - self.loss = AffinitiesLoss(len(task_config.neighborhood)) + self.loss = AffinitiesLoss( + len(task_config.neighborhood), task_config.lsds_to_affs_weight_ratio + ) self.post_processor = WatershedPostProcessor(offsets=task_config.neighborhood) self.evaluator = InstanceEvaluator() diff --git a/dacapo/experiments/tasks/affinities_task_config.py b/dacapo/experiments/tasks/affinities_task_config.py index d4b2c619..a50c2141 100644 --- a/dacapo/experiments/tasks/affinities_task_config.py +++ b/dacapo/experiments/tasks/affinities_task_config.py @@ -30,3 +30,9 @@ class AffinitiesTaskConfig(TaskConfig): "It has been shown that lsds as an auxiliary task can help affinity predictions." }, ) + lsds_to_affs_weight_ratio: float = attr.ib( + default=1, + metadata={ + "help_text": "If training with lsds, set how much they should be weighted compared to affs." + }, + ) diff --git a/dacapo/experiments/tasks/losses/affinities_loss.py b/dacapo/experiments/tasks/losses/affinities_loss.py index 65ada884..74fc7fe6 100644 --- a/dacapo/experiments/tasks/losses/affinities_loss.py +++ b/dacapo/experiments/tasks/losses/affinities_loss.py @@ -3,8 +3,9 @@ class AffinitiesLoss(Loss): - def __init__(self, num_affinities: int): + def __init__(self, num_affinities: int, lsds_to_affs_weight_ratio: float): self.num_affinities = num_affinities + self.lsds_to_affs_weight_ratio = lsds_to_affs_weight_ratio def compute(self, prediction, target, weight): affs, affs_target, affs_weight = ( @@ -21,7 +22,7 @@ def compute(self, prediction, target, weight): return ( torch.nn.BCEWithLogitsLoss(reduction="none")(affs, affs_target) * affs_weight - ).mean() + ( + ).mean() + self.lsds_to_affs_weight_ratio * ( torch.nn.MSELoss(reduction="none")(torch.nn.Sigmoid()(aux), aux_target) * aux_weight ).mean() diff --git a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py index 8fa6104b..1a7c4627 100644 --- a/dacapo/experiments/tasks/post_processors/watershed_post_processor.py +++ b/dacapo/experiments/tasks/post_processors/watershed_post_processor.py @@ -24,7 +24,7 @@ def enumerate_parameters(self): """Enumerate all possible parameters of this post-processor. Should return instances of ``PostProcessorParameters``.""" - for i, bias in enumerate([0.1, 0.5, 0.9]): + for i, bias in enumerate([0.1, 0.25, 0.5, 0.75, 0.9]): yield WatershedPostProcessorParameters(id=i, bias=bias) def set_prediction(self, prediction_array_identifier): @@ -44,9 +44,9 @@ def process(self, parameters, output_array_identifier): # if a previous segmentation is provided, it must have a "grid graph" # in its metadata. pred_data = self.prediction_array[self.prediction_array.roi] - affs = pred_data[: len(self.offsets)] + affs = pred_data[: len(self.offsets)].astype(np.float64) segmentation = mws.agglom( - affs - 0.5, + affs - parameters.bias, self.offsets, ) # filter fragments @@ -59,12 +59,17 @@ def process(self, parameters, output_array_identifier): for fragment, mean in zip( fragment_ids, measurements.mean(average_affs, segmentation, fragment_ids) ): - if mean < 0.5: + if mean < parameters.bias: filtered_fragments.append(fragment) filtered_fragments = np.array(filtered_fragments, dtype=segmentation.dtype) replace = np.zeros_like(filtered_fragments) - segmentation = npi.remap(segmentation, filtered_fragments, replace) + + # DGA: had to add in flatten and reshape since remap (in particular indices) didn't seem to work with ndarrays for the input + if filtered_fragments.size > 0: + segmentation = npi.remap( + segmentation.flatten(), filtered_fragments, replace + ).reshape(segmentation.shape) output_array[self.prediction_array.roi] = segmentation diff --git a/dacapo/experiments/tasks/predictors/affinities_predictor.py b/dacapo/experiments/tasks/predictors/affinities_predictor.py index 81efb237..40d81f5d 100644 --- a/dacapo/experiments/tasks/predictors/affinities_predictor.py +++ b/dacapo/experiments/tasks/predictors/affinities_predictor.py @@ -17,9 +17,17 @@ class AffinitiesPredictor(Predictor): - def __init__(self, neighborhood: List[Coordinate], lsds: bool = True): + def __init__( + self, + neighborhood: List[Coordinate], + lsds: bool = True, + num_voxels: int = 20, + downsample_lsds: int = 1, + grow_boundary_iterations: int = 0, + ): self.neighborhood = neighborhood self.lsds = lsds + self.num_voxels = num_voxels if lsds: self._extractor = None if self.dims == 2: @@ -30,12 +38,16 @@ def __init__(self, neighborhood: List[Coordinate], lsds: bool = True): raise ValueError( f"Cannot compute lsds on volumes with {self.dims} dimensions" ) + self.downsample_lsds = downsample_lsds else: self.num_lsds = 0 + self.grow_boundary_iterations = grow_boundary_iterations def extractor(self, voxel_size): if self._extractor is None: - self._extractor = LsdExtractor(self.sigma(voxel_size)) + self._extractor = LsdExtractor( + self.sigma(voxel_size), downsample=self.downsample_lsds + ) return self._extractor @@ -45,8 +57,7 @@ def dims(self): def sigma(self, voxel_size): voxel_dist = max(voxel_size) # arbitrarily chosen - num_voxels = 10 # arbitrarily chosen - sigma = voxel_dist * num_voxels + sigma = voxel_dist * self.num_voxels # arbitrarily chosen return Coordinate((sigma,) * self.dims) def lsd_pad(self, voxel_size): @@ -118,7 +129,9 @@ def _grow_boundaries(self, mask, slab): slice(start[d], start[d] + slab[d]) for d in range(len(slab)) ) mask_slab = mask[slices] - dilated_mask_slab = ndimage.binary_dilation(mask_slab, iterations=1) + dilated_mask_slab = ndimage.binary_dilation( + mask_slab, iterations=self.grow_boundary_iterations + ) foreground[slices] = dilated_mask_slab # label new background @@ -130,10 +143,12 @@ def create_weight(self, gt, target, mask, moving_class_counts=None): (moving_class_counts, moving_lsd_class_counts) = ( moving_class_counts if moving_class_counts is not None else (None, None) ) - # mask_data = self._grow_boundaries( - # mask[target.roi], slab=tuple(1 if c == "c" else -1 for c in target.axes) - # ) - mask_data = mask[target.roi] + if self.grow_boundary_iterations > 0: + mask_data = self._grow_boundaries( + mask[target.roi], slab=tuple(1 if c == "c" else -1 for c in target.axes) + ) + else: + mask_data = mask[target.roi] aff_weights, moving_class_counts = balance_weights( target[target.roi][: self.num_channels - self.num_lsds].astype(np.uint8), 2, diff --git a/dacapo/experiments/tasks/predictors/distance_predictor.py b/dacapo/experiments/tasks/predictors/distance_predictor.py index a8fa4449..70c2bde4 100644 --- a/dacapo/experiments/tasks/predictors/distance_predictor.py +++ b/dacapo/experiments/tasks/predictors/distance_predictor.py @@ -27,7 +27,7 @@ class DistancePredictor(Predictor): in the channels argument. """ - def __init__(self, channels: List[str], scale_factor: float, mask_distances=bool): + def __init__(self, channels: List[str], scale_factor: float, mask_distances: bool): self.channels = channels self.norm = "tanh" self.dt_scale_factor = scale_factor diff --git a/dacapo/experiments/trainers/gunpowder_trainer.py b/dacapo/experiments/trainers/gunpowder_trainer.py index efec630f..f5d8fcd5 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer.py +++ b/dacapo/experiments/trainers/gunpowder_trainer.py @@ -42,6 +42,11 @@ def __init__(self, trainer_config): self.mask_integral_downsample_factor = 4 self.clip_raw = trainer_config.clip_raw + # Testing out if calculating multiple times and multiplying is necessary + self.add_predictor_nodes_to_dataset = ( + trainer_config.add_predictor_nodes_to_dataset + ) + self.scheduler = None def create_optimizer(self, model): @@ -146,13 +151,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): for augment in self.augments: dataset_source += augment.node(raw_key, gt_key, mask_key) - # Add predictor nodes to dataset_source - dataset_source += DaCapoTargetFilter( - task.predictor, - gt_key=gt_key, - weights_key=dataset_weight_key, - mask_key=mask_key, - ) + if self.add_predictor_nodes_to_dataset: + # Add predictor nodes to dataset_source + dataset_source += DaCapoTargetFilter( + task.predictor, + gt_key=gt_key, + weights_key=dataset_weight_key, + mask_key=mask_key, + ) dataset_sources.append(dataset_source) pipeline = tuple(dataset_sources) + gp.RandomProvider(weights) @@ -162,11 +168,14 @@ def build_batch_provider(self, datasets, model, task, snapshot_container=None): task.predictor, gt_key=gt_key, target_key=target_key, - weights_key=datasets_weight_key, + weights_key=datasets_weight_key + if self.add_predictor_nodes_to_dataset + else weight_key, mask_key=mask_key, ) - pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) + if self.add_predictor_nodes_to_dataset: + pipeline += Product(dataset_weight_key, datasets_weight_key, weight_key) # Trainer attributes: if self.num_data_fetchers > 1: diff --git a/dacapo/experiments/trainers/gunpowder_trainer_config.py b/dacapo/experiments/trainers/gunpowder_trainer_config.py index ae424305..539e3c5e 100644 --- a/dacapo/experiments/trainers/gunpowder_trainer_config.py +++ b/dacapo/experiments/trainers/gunpowder_trainer_config.py @@ -29,3 +29,10 @@ class GunpowderTrainerConfig(TrainerConfig): ) min_masked: Optional[float] = attr.ib(default=0.15) clip_raw: bool = attr.ib(default=True) + + add_predictor_nodes_to_dataset: Optional[bool] = attr.ib( + default=True, + metadata={ + "help_text": "Whether to add a predictor node to dataset_source and apply product of weights" + }, + ) diff --git a/dacapo/experiments/training_stats.py b/dacapo/experiments/training_stats.py index cd3fcd01..72c631ed 100644 --- a/dacapo/experiments/training_stats.py +++ b/dacapo/experiments/training_stats.py @@ -16,7 +16,9 @@ class TrainingStats: def add_iteration_stats(self, iteration_stats: TrainingIterationStats) -> None: if len(self.iteration_stats) > 0: - assert iteration_stats.iteration == self.iteration_stats[-1].iteration + 1 + assert ( + iteration_stats.iteration == self.iteration_stats[-1].iteration + 1 + ), f"Expected iteration {self.iteration_stats[-1].iteration + 1}, got {iteration_stats.iteration}" self.iteration_stats.append(iteration_stats) diff --git a/dacapo/predict.py b/dacapo/predict.py index 5a40e303..1df4d779 100644 --- a/dacapo/predict.py +++ b/dacapo/predict.py @@ -24,6 +24,8 @@ def predict( num_cpu_workers: int = 4, compute_context: ComputeContext = LocalTorch(), output_roi: Optional[Roi] = None, + output_dtype: np.dtype = np.float32, # type: ignore + overwrite: bool = False, ): # get the model's input and output size @@ -56,7 +58,7 @@ def predict( output_roi, model.num_out_channels, output_voxel_size, - np.float32, + output_dtype, ) # create gunpowder keys @@ -75,8 +77,8 @@ def predict( # raw: (1, c, d, h, w) gt_padding = (output_size - output_roi.shape) % output_size - prediction_roi = output_roi.grow(gt_padding) - + prediction_roi = output_roi.grow(gt_padding) # TODO: are we sure this makes sense? + # TODO: Add cache node? # predict pipeline += gp_torch.Predict( model=model, @@ -84,7 +86,9 @@ def predict( outputs={0: prediction}, array_specs={ prediction: gp.ArraySpec( - roi=prediction_roi, voxel_size=output_voxel_size, dtype=np.float32 + roi=prediction_roi, + voxel_size=output_voxel_size, + dtype=np.float32, # assumes network output is float32 ) }, spawn_subprocess=False, @@ -97,22 +101,29 @@ def predict( pipeline += gp.Squeeze([raw, prediction]) # raw: (c, d, h, w) # prediction: (c, d, h, w) - # raw: (c, d, h, w) - # prediction: (c, d, h, w) + + # convert to uint8 if necessary: + if output_dtype == np.uint8: + pipeline += gp.IntensityScaleShift( + prediction, scale=255.0, shift=0.0 + ) # assumes float32 is [0,1] + pipeline += gp.AsType(prediction, output_dtype) # write to zarr pipeline += gp.ZarrWrite( {prediction: prediction_array_identifier.dataset}, prediction_array_identifier.container.parent, prediction_array_identifier.container.name, - dataset_dtypes={prediction: np.float32}, + dataset_dtypes={prediction: output_dtype}, ) # create reference batch request ref_request = gp.BatchRequest() ref_request.add(raw, input_size) ref_request.add(prediction, output_size) - pipeline += gp.Scan(ref_request) + pipeline += gp.Scan( + ref_request + ) # TODO: This is a slow implementation for rendering # build pipeline and predict in complete output ROI diff --git a/dacapo/train.py b/dacapo/train.py index 9203c1be..7beb096b 100644 --- a/dacapo/train.py +++ b/dacapo/train.py @@ -16,6 +16,7 @@ def train(run_name: str, compute_context: ComputeContext = LocalTorch()): """Train a run""" if compute_context.train(run_name): + logger.error("Run %s is already being trained", run_name) # if compute context runs train in some other process # we are done here. return @@ -96,9 +97,10 @@ def train_run( weights_store.retrieve_weights(run, iteration=trained_until) elif latest_weights_iteration > trained_until: - raise RuntimeError( + weights_store.retrieve_weights(run, iteration=latest_weights_iteration) + logger.error( f"Found weights for iteration {latest_weights_iteration}, but " - f"run {run.name} was only trained until {trained_until}." + f"run {run.name} was only trained until {trained_until}. " ) # start/resume training @@ -127,18 +129,20 @@ def train_run( # train for at most 100 iterations at a time, then store training stats iterations = min(100, run.train_until - trained_until) iteration_stats = None - - for iteration_stats in tqdm( + bar = tqdm( trainer.iterate( iterations, run.model, run.optimizer, compute_context.device, ), - "training", - iterations, - ): + desc=f"training until {iterations + trained_until}", + total=run.train_until, + initial=trained_until, + ) + for iteration_stats in bar: run.training_stats.add_iteration_stats(iteration_stats) + bar.set_postfix({"loss": iteration_stats.loss}) if (iteration_stats.iteration + 1) % run.validation_interval == 0: break @@ -160,22 +164,26 @@ def train_run( run.model = run.model.to(torch.device("cpu")) run.move_optimizer(torch.device("cpu"), empty_cuda_cache=True) - weights_store.store_weights(run, iteration_stats.iteration + 1) - validate_run( - run, - iteration_stats.iteration + 1, - compute_context=compute_context, - ) - stats_store.store_validation_iteration_scores( - run.name, run.validation_scores - ) stats_store.store_training_stats(run.name, run.training_stats) + weights_store.store_weights(run, iteration_stats.iteration + 1) + try: + validate_run( + run, + iteration_stats.iteration + 1, + compute_context=compute_context, + ) + stats_store.store_validation_iteration_scores( + run.name, run.validation_scores + ) + except Exception as e: + logger.error( + f"Validation failed for run {run.name} at iteration " + f"{iteration_stats.iteration + 1}.", + exc_info=e, + ) # make sure to move optimizer back to the correct device run.move_optimizer(compute_context.device) run.model.train() - weights_store.store_weights(run, run.training_stats.trained_until()) - stats_store.store_training_stats(run.name, run.training_stats) - logger.info("Trained until %d, finished.", trained_until) diff --git a/dacapo/validate.py b/dacapo/validate.py index 25b7463e..a1cf9da7 100644 --- a/dacapo/validate.py +++ b/dacapo/validate.py @@ -141,6 +141,7 @@ def validate_run( prediction_array_identifier = array_store.validation_prediction_array( run.name, iteration, validation_dataset ) + logger.info("Predicting on dataset %s", validation_dataset.name) predict( run.model, validation_dataset.raw, @@ -148,6 +149,7 @@ def validate_run( compute_context=compute_context, output_roi=validation_dataset.gt.roi, ) + logger.info("Predicted on dataset %s", validation_dataset.name) post_processor.set_prediction(prediction_array_identifier) diff --git a/docs/source/_static/icon_dacapo.png b/docs/source/_static/icon_dacapo.png new file mode 100644 index 00000000..f04fc931 Binary files /dev/null and b/docs/source/_static/icon_dacapo.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index cd582361..7df2f563 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -12,14 +12,15 @@ # import os import sys -sys.path.insert(0, os.path.abspath('../..')) + +sys.path.insert(0, os.path.abspath("../..")) # -- Project information ----------------------------------------------------- -project = 'DaCapo' -copyright = '2022, William Patton, David Ackerman, Jan Funke' -author = 'William Patton, David Ackerman, Jan Funke' +project = "DaCapo" +copyright = "2022, William Patton, David Ackerman, Jan Funke" +author = "William Patton, David Ackerman, Jan Funke" # -- General configuration --------------------------------------------------- @@ -27,15 +28,15 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon', 'sphinx_autodoc_typehints'] +extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon", "sphinx_autodoc_typehints"] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # -- Options for HTML output ------------------------------------------------- @@ -43,12 +44,12 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_material' +html_theme = "sphinx_material" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] html_css_files = [ - 'css/custom.css', -] \ No newline at end of file + "css/custom.css", +] diff --git a/mypy.ini b/mypy.ini index 722c11df..aadc732e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -68,4 +68,7 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-mwatershed.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True + +[mypy-numpy_indexed.*] +ignore_missing_imports = True diff --git a/requirements-dev.txt b/requirements-dev.txt index 492c8e6f..12afa83a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,5 +1,5 @@ black mypy -pytest +pytest==7.4.4 pytest-cov pytest-lazy-fixture \ No newline at end of file diff --git a/setup.py b/setup.py index 3ba1f0d0..3e6f5106 100644 --- a/setup.py +++ b/setup.py @@ -5,16 +5,16 @@ description="Framework for easy composition of volumetric machine learning jobs.", long_description=open("README.md", "r").read(), long_description_content_type="text/markdown", - version="0.1", - url="https://github.com/funkelab/dacapo", - author="Jan Funke, Will Patton", - author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org", + version="0.2.0", + url="https://github.com/janelia-cellmap/dacapo", + author="Jan Funke, Will Patton, Jeff Rhoades, Marwan Zouinkhi", + author_email="funkej@janelia.hhmi.org, pattonw@janelia.hhmi.org, rhoadesj@hhmi.org, zouinkhim@hhmi.org", license="MIT", packages=find_packages(), entry_points={"console_scripts": ["dacapo=dacapo.cli:cli"]}, include_package_data=True, install_requires=[ - "numpy", + "numpy==1.22.3", "pyyaml", "zarr", "cattrs", @@ -32,9 +32,14 @@ "funlib.math>=0.1", "funlib.geometry>=0.2", "mwatershed>=0.1", - "funlib.persistence>=0.1", + "funlib.persistence @ git+https://github.com/janelia-cellmap/funlib.persistence", "funlib.evaluate @ git+https://github.com/pattonw/funlib.evaluate", "gunpowder>=1.3", - "lsds>=0.1.3", + # "lsds>=0.1.3", + "lsds @ git+https://github.com/funkelab/lsd", + "xarray", + "cattrs", + "numpy-indexed", + "click", ], )