Skip to content

stat_calculators ¤

Classes:

Name Description
CrickQuantilesCalculator

to calculate dataset quantiles with the experimental crick libray

MeanCalculator

to calculate sample and dataset mean for in-memory samples

MeanQuantilesCalculator

to calculate dataset quantiles heuristically by averaging across samples

MeanVarStdCalculator

to calculate sample and dataset mean, variance or standard deviation

NaiveSampleMeasureCalculator

wrapper for measures to match interface of other sample measure calculators

SampleQuantilesCalculator

to calculate sample quantiles

StatsCalculator

Estimates dataset statistics and computes sample statistics efficiently

Functions:

Name Description
compute_dataset_measures

compute all dataset measures for the given dataset

compute_measures

compute all measures for the given dataset

compute_sample_measures

compute all sample measures for the given sample

get_measure_calculators

determines which calculators are needed to compute the required measures efficiently

Attributes:

Name Type Description
DatasetMeasureCalculator
DatasetQuantilesCalculator Type[Union[MeanQuantilesCalculator, CrickQuantilesCalculator]]
SampleMeasureCalculator
TDigest

DatasetMeasureCalculator module-attribute ¤

DatasetQuantilesCalculator module-attribute ¤

DatasetQuantilesCalculator: Type[Union[MeanQuantilesCalculator, CrickQuantilesCalculator]] = MeanQuantilesCalculator

SampleMeasureCalculator module-attribute ¤

TDigest module-attribute ¤

TDigest = crick.TDigest

CrickQuantilesCalculator ¤

CrickQuantilesCalculator(member_id: MemberId, axes: Optional[Sequence[AxisId]], qs: Collection[float])

to calculate dataset quantiles with the experimental crick libray

Methods:

Name Description
finalize
update
Source code in src/bioimageio/core/stat_calculators.py
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
def __init__(
    self,
    member_id: MemberId,
    axes: Optional[Sequence[AxisId]],
    qs: Collection[float],
):
    warnings.warn("Computing dataset quantiles with experimental 'crick' library.")
    super().__init__()
    assert all(0.0 <= q <= 1.0 for q in qs)
    assert axes is None or "_quantiles" not in axes
    self._qs = sorted(set(qs))
    self._axes = None if axes is None else tuple(axes)
    self._member_id = member_id
    self._digest: Optional[List[TDigest]] = None
    self._dims: Optional[Tuple[AxisId, ...]] = None
    self._indices: Optional[Iterator[Tuple[int, ...]]] = None
    self._shape: Optional[Tuple[int, ...]] = None

finalize ¤

finalize() -> Dict[DatasetQuantile, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def finalize(self) -> Dict[DatasetQuantile, MeasureValue]:
    if self._digest is None:
        return {}
    else:
        assert self._dims is not None
        assert self._shape is not None

        vs: NDArray[Any] = np.asarray(
            [[d.quantile(q) for d in self._digest] for q in self._qs]
        ).reshape(self._shape)
        return {
            DatasetQuantile(
                q=q, axes=self._axes, member_id=self._member_id
            ): Tensor(v, dims=self._dims[1:])
            for q, v in zip(self._qs, vs)
        }

update ¤

update(part: Sample)
Source code in src/bioimageio/core/stat_calculators.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
def update(self, part: Sample):
    tensor = (
        part.members[self._member_id]
        if isinstance(part, Sample)
        else part.members[self._member_id].data
    )
    assert "_quantiles" not in tensor.dims
    if self._digest is None:
        self._initialize(tensor.tagged_shape)

    assert self._digest is not None
    assert self._indices is not None
    assert self._dims is not None
    for i, idx in enumerate(self._indices):
        self._digest[i].update(tensor[dict(zip(self._dims[1:], idx))])

MeanCalculator ¤

MeanCalculator(member_id: MemberId, axes: Optional[Sequence[AxisId]])

to calculate sample and dataset mean for in-memory samples

Methods:

Name Description
compute
compute_and_update
finalize
update
Source code in src/bioimageio/core/stat_calculators.py
71
72
73
74
75
76
77
78
def __init__(self, member_id: MemberId, axes: Optional[Sequence[AxisId]]):
    super().__init__()
    self._n: int = 0
    self._mean: Optional[Tensor] = None
    self._axes = None if axes is None else tuple(axes)
    self._member_id = member_id
    self._sample_mean = SampleMean(member_id=self._member_id, axes=self._axes)
    self._dataset_mean = DatasetMean(member_id=self._member_id, axes=self._axes)

compute ¤

compute(sample: Sample) -> Dict[SampleMean, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
80
81
def compute(self, sample: Sample) -> Dict[SampleMean, MeasureValue]:
    return {self._sample_mean: self._compute_impl(sample)}

compute_and_update ¤

compute_and_update(sample: Sample) -> Dict[SampleMean, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
91
92
93
94
def compute_and_update(self, sample: Sample) -> Dict[SampleMean, MeasureValue]:
    mean = self._compute_impl(sample)
    self._update_impl(sample.members[self._member_id], mean)
    return {self._sample_mean: mean}

finalize ¤

finalize() -> Dict[DatasetMean, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
113
114
115
116
117
def finalize(self) -> Dict[DatasetMean, MeasureValue]:
    if self._mean is None:
        return {}
    else:
        return {self._dataset_mean: self._mean}

update ¤

update(sample: Sample) -> None
Source code in src/bioimageio/core/stat_calculators.py
87
88
89
def update(self, sample: Sample) -> None:
    mean = self._compute_impl(sample)
    self._update_impl(sample.members[self._member_id], mean)

MeanQuantilesCalculator ¤

MeanQuantilesCalculator(member_id: MemberId, axes: Optional[Sequence[AxisId]], qs: Collection[float])

to calculate dataset quantiles heuristically by averaging across samples

Note

The returned dataset quantiles are an estiamte and not mathematically correct

Methods:

Name Description
finalize
update
Source code in src/bioimageio/core/stat_calculators.py
247
248
249
250
251
252
253
254
255
256
257
258
259
def __init__(
    self,
    member_id: MemberId,
    axes: Optional[Sequence[AxisId]],
    qs: Collection[float],
):
    super().__init__()
    assert all(0.0 <= q <= 1.0 for q in qs)
    self._qs = sorted(set(qs))
    self._axes = None if axes is None else tuple(axes)
    self._member_id = member_id
    self._n: int = 0
    self._estimates: Optional[Tensor] = None

finalize ¤

finalize() -> Dict[DatasetQuantile, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
281
282
283
284
285
286
287
288
289
290
291
def finalize(self) -> Dict[DatasetQuantile, MeasureValue]:
    if self._estimates is None:
        return {}
    else:
        warnings.warn(
            "Computed dataset percentiles naively by averaging percentiles of samples."
        )
        return {
            DatasetQuantile(q=q, axes=self._axes, member_id=self._member_id): e
            for q, e in zip(self._qs, self._estimates)
        }

update ¤

update(sample: Sample)
Source code in src/bioimageio/core/stat_calculators.py
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
def update(self, sample: Sample):
    tensor = sample.members[self._member_id]
    sample_estimates = tensor.quantile(
        self._qs, dim=self._axes, method="linear"
    ).astype("float64", copy=False)

    # reduced voxel count
    n = int(tensor.size / np.prod(sample_estimates.shape_tuple[1:]))

    if self._estimates is None:
        assert self._n == 0
        self._estimates = sample_estimates
    else:
        self._estimates = (self._n * self._estimates + n * sample_estimates) / (
            self._n + n
        )
        assert self._estimates.dtype == "float64"

    self._n += n

MeanVarStdCalculator ¤

MeanVarStdCalculator(member_id: MemberId, axes: Optional[Sequence[AxisId]])

to calculate sample and dataset mean, variance or standard deviation

Methods:

Name Description
compute
finalize
update
Source code in src/bioimageio/core/stat_calculators.py
123
124
125
126
127
128
129
def __init__(self, member_id: MemberId, axes: Optional[Sequence[AxisId]]):
    super().__init__()
    self._axes = None if axes is None else tuple(map(AxisId, axes))
    self._member_id = member_id
    self._n: int = 0
    self._mean: Optional[Tensor] = None
    self._m2: Optional[Tensor] = None

compute ¤

compute(sample: Sample) -> Dict[Union[SampleMean, SampleVar, SampleStd], MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def compute(
    self, sample: Sample
) -> Dict[Union[SampleMean, SampleVar, SampleStd], MeasureValue]:
    tensor = sample.members[self._member_id]
    mean = tensor.mean(dim=self._axes)
    c = (tensor - mean).data
    if self._axes is None:
        n = tensor.size
    else:
        n = int(np.prod([tensor.sizes[d] for d in self._axes]))

    if xr.__version__.startswith("2023"):
        var = xr.dot(c, c, dims=self._axes) / n
    else:
        var = xr.dot(c, c, dim=self._axes) / n

    assert isinstance(var, xr.DataArray)
    std = np.sqrt(var)
    assert isinstance(std, xr.DataArray)
    return {
        SampleMean(axes=self._axes, member_id=self._member_id): mean,
        SampleVar(axes=self._axes, member_id=self._member_id): Tensor.from_xarray(
            var
        ),
        SampleStd(axes=self._axes, member_id=self._member_id): Tensor.from_xarray(
            std
        ),
    }

finalize ¤

finalize() -> Dict[Union[DatasetMean, DatasetVar, DatasetStd], MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
def finalize(
    self,
) -> Dict[Union[DatasetMean, DatasetVar, DatasetStd], MeasureValue]:
    if (
        self._axes is not None
        and BATCH_AXIS_ID not in self._axes
        or self._mean is None
    ):
        return {}
    else:
        assert self._m2 is not None
        var = self._m2 / self._n
        sqrt = var**0.5
        if isinstance(sqrt, (int, float)):
            # var and mean are scalar tensors, let's keep it consistent
            sqrt = Tensor.from_xarray(xr.DataArray(sqrt))

        assert isinstance(sqrt, Tensor), type(sqrt)
        return {
            DatasetMean(member_id=self._member_id, axes=self._axes): self._mean,
            DatasetVar(member_id=self._member_id, axes=self._axes): var,
            DatasetStd(member_id=self._member_id, axes=self._axes): sqrt,
        }

update ¤

update(sample: Sample)
Source code in src/bioimageio/core/stat_calculators.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def update(self, sample: Sample):
    if self._axes is not None and BATCH_AXIS_ID not in self._axes:
        return

    tensor = sample.members[self._member_id].astype("float64", copy=False)
    mean_b = tensor.mean(dim=self._axes)
    assert mean_b.dtype == "float64"
    # reduced voxel count
    n_b = int(tensor.size / mean_b.size)
    m2_b = ((tensor - mean_b) ** 2).sum(dim=self._axes)
    assert m2_b.dtype == "float64"
    if self._mean is None:
        assert self._m2 is None
        self._n = n_b
        self._mean = mean_b
        self._m2 = m2_b
    else:
        n_a = self._n
        mean_a = self._mean
        m2_a = self._m2
        self._n = n = n_a + n_b
        self._mean = (n_a * mean_a + n_b * mean_b) / n
        assert self._mean.dtype == "float64"
        d = mean_b - mean_a
        self._m2 = m2_a + m2_b + d**2 * n_a * n_b / n
        assert self._m2.dtype == "float64"

NaiveSampleMeasureCalculator ¤

NaiveSampleMeasureCalculator(member_id: MemberId, measure: SampleMeasure)

wrapper for measures to match interface of other sample measure calculators

Methods:

Name Description
compute

Attributes:

Name Type Description
measure
tensor_name
Source code in src/bioimageio/core/stat_calculators.py
376
377
378
379
def __init__(self, member_id: MemberId, measure: SampleMeasure):
    super().__init__()
    self.tensor_name = member_id
    self.measure = measure

measure instance-attribute ¤

measure = measure

tensor_name instance-attribute ¤

tensor_name = member_id

compute ¤

compute(sample: Sample) -> Dict[SampleMeasure, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
381
382
def compute(self, sample: Sample) -> Dict[SampleMeasure, MeasureValue]:
    return {self.measure: self.measure.compute(sample)}

SampleQuantilesCalculator ¤

SampleQuantilesCalculator(member_id: MemberId, axes: Optional[Sequence[AxisId]], qs: Collection[float], method: QuantileMethod = 'linear')

to calculate sample quantiles

Methods:

Name Description
compute
Source code in src/bioimageio/core/stat_calculators.py
215
216
217
218
219
220
221
222
223
224
225
226
227
def __init__(
    self,
    member_id: MemberId,
    axes: Optional[Sequence[AxisId]],
    qs: Collection[float],
    method: QuantileMethod = "linear",
):
    super().__init__()
    assert all(0.0 <= q <= 1.0 for q in qs)
    self._qs = sorted(set(qs))
    self._axes = None if axes is None else tuple(axes)
    self._member_id = member_id
    self._method: QuantileMethod = method

compute ¤

compute(sample: Sample) -> Dict[SampleQuantile, MeasureValue]
Source code in src/bioimageio/core/stat_calculators.py
229
230
231
232
233
234
235
236
237
def compute(self, sample: Sample) -> Dict[SampleQuantile, MeasureValue]:
    tensor = sample.members[self._member_id]
    ps = tensor.quantile(self._qs, dim=self._axes, method=self._method)
    return {
        SampleQuantile(
            q=q, axes=self._axes, member_id=self._member_id, method=self._method
        ): p
        for q, p in zip(self._qs, ps)
    }

StatsCalculator ¤

StatsCalculator(measures: Collection[Measure], initial_dataset_measures: Optional[Mapping[DatasetMeasure, MeasureValue]] = None)

Estimates dataset statistics and computes sample statistics efficiently

Methods:

Name Description
finalize

returns aggregated dataset statistics

skip_update_and_get_all

Returns sample as well as previously computed dataset statistics

update
update_and_get_all

Returns sample as well as updated dataset statistics

Attributes:

Name Type Description
has_dataset_measures
sample_count
Source code in src/bioimageio/core/stat_calculators.py
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
def __init__(
    self,
    measures: Collection[Measure],
    initial_dataset_measures: Optional[
        Mapping[DatasetMeasure, MeasureValue]
    ] = None,
):
    super().__init__()
    self.sample_count = 0
    self.sample_calculators, self.dataset_calculators = get_measure_calculators(
        measures
    )
    if not initial_dataset_measures:
        self._current_dataset_measures: Optional[
            Dict[DatasetMeasure, MeasureValue]
        ] = None
    else:
        missing_dataset_meas = {
            m
            for m in measures
            if isinstance(m, DatasetMeasureBase)
            and m not in initial_dataset_measures
        }
        if missing_dataset_meas:
            logger.debug(
                f"ignoring `initial_dataset_measure` as it is missing {missing_dataset_meas}"
            )
            self._current_dataset_measures = None
        else:
            self._current_dataset_measures = dict(initial_dataset_measures)

has_dataset_measures property ¤

has_dataset_measures

sample_count instance-attribute ¤

sample_count = 0

finalize ¤

finalize() -> Dict[DatasetMeasure, MeasureValue]

returns aggregated dataset statistics

Source code in src/bioimageio/core/stat_calculators.py
440
441
442
443
444
445
446
447
448
def finalize(self) -> Dict[DatasetMeasure, MeasureValue]:
    """returns aggregated dataset statistics"""
    if self._current_dataset_measures is None:
        self._current_dataset_measures = {}
        for calc in self.dataset_calculators:
            values = calc.finalize()
            self._current_dataset_measures.update(values.items())

    return self._current_dataset_measures

skip_update_and_get_all ¤

skip_update_and_get_all(sample: Sample) -> Dict[Measure, MeasureValue]

Returns sample as well as previously computed dataset statistics

Source code in src/bioimageio/core/stat_calculators.py
461
462
463
def skip_update_and_get_all(self, sample: Sample) -> Dict[Measure, MeasureValue]:
    """Returns sample as well as previously computed dataset statistics"""
    return {**self._compute(sample), **self.finalize()}

update ¤

update(sample: Union[Sample, Iterable[Sample]]) -> None
Source code in src/bioimageio/core/stat_calculators.py
434
435
436
437
438
def update(
    self,
    sample: Union[Sample, Iterable[Sample]],
) -> None:
    _ = self._update(sample)

update_and_get_all ¤

update_and_get_all(sample: Union[Sample, Iterable[Sample]]) -> Dict[Measure, MeasureValue]

Returns sample as well as updated dataset statistics

Source code in src/bioimageio/core/stat_calculators.py
450
451
452
453
454
455
456
457
458
459
def update_and_get_all(
    self,
    sample: Union[Sample, Iterable[Sample]],
) -> Dict[Measure, MeasureValue]:
    """Returns sample as well as updated dataset statistics"""
    last_sample = self._update(sample)
    if last_sample is None:
        raise ValueError("`sample` was not a `Sample`, nor did it yield any.")

    return {**self._compute(last_sample), **self.finalize()}

compute_dataset_measures ¤

compute_dataset_measures(measures: Iterable[DatasetMeasure], dataset: Iterable[Sample]) -> Dict[DatasetMeasure, MeasureValue]

compute all dataset measures for the given dataset

Source code in src/bioimageio/core/stat_calculators.py
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
def compute_dataset_measures(
    measures: Iterable[DatasetMeasure], dataset: Iterable[Sample]
) -> Dict[DatasetMeasure, MeasureValue]:
    """compute all dataset `measures` for the given `dataset`"""
    sample_calculators, calculators = get_measure_calculators(measures)
    assert not sample_calculators

    ret: Dict[DatasetMeasure, MeasureValue] = {}

    for sample in dataset:
        for calc in calculators:
            calc.update(sample)

    for calc in calculators:
        ret.update(calc.finalize().items())

    return ret

compute_measures ¤

compute_measures(measures: Iterable[Measure], dataset: Iterable[Sample]) -> Dict[Measure, MeasureValue]

compute all measures for the given dataset sample measures are computed for the last sample in dataset

Source code in src/bioimageio/core/stat_calculators.py
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def compute_measures(
    measures: Iterable[Measure], dataset: Iterable[Sample]
) -> Dict[Measure, MeasureValue]:
    """compute all `measures` for the given `dataset`
    sample measures are computed for the last sample in `dataset`"""
    sample_calculators, dataset_calculators = get_measure_calculators(measures)
    ret: Dict[Measure, MeasureValue] = {}
    sample = None
    for sample in dataset:
        for calc in dataset_calculators:
            calc.update(sample)
    if sample is None:
        raise ValueError("empty dataset")

    for calc in dataset_calculators:
        ret.update(calc.finalize().items())

    for calc in sample_calculators:
        ret.update(calc.compute(sample).items())

    return ret

compute_sample_measures ¤

compute_sample_measures(measures: Iterable[SampleMeasure], sample: Sample) -> Dict[SampleMeasure, MeasureValue]

compute all sample measures for the given sample

Source code in src/bioimageio/core/stat_calculators.py
596
597
598
599
600
601
602
603
604
605
606
607
def compute_sample_measures(
    measures: Iterable[SampleMeasure], sample: Sample
) -> Dict[SampleMeasure, MeasureValue]:
    """compute all sample `measures` for the given `sample`"""
    calculators, dataset_calculators = get_measure_calculators(measures)
    assert not dataset_calculators
    ret: Dict[SampleMeasure, MeasureValue] = {}

    for calc in calculators:
        ret.update(calc.compute(sample).items())

    return ret

get_measure_calculators ¤

get_measure_calculators(required_measures: Iterable[Measure]) -> Tuple[List[SampleMeasureCalculator], List[DatasetMeasureCalculator]]

determines which calculators are needed to compute the required measures efficiently

Source code in src/bioimageio/core/stat_calculators.py
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
def get_measure_calculators(
    required_measures: Iterable[Measure],
) -> Tuple[List[SampleMeasureCalculator], List[DatasetMeasureCalculator]]:
    """determines which calculators are needed to compute the required measures efficiently"""

    sample_calculators: List[SampleMeasureCalculator] = []
    dataset_calculators: List[DatasetMeasureCalculator] = []

    # split required measures into groups
    required_sample_means: Set[SampleMean] = set()
    required_dataset_means: Set[DatasetMean] = set()
    required_sample_mean_var_std: Set[Union[SampleMean, SampleVar, SampleStd]] = set()
    required_dataset_mean_var_std: Set[Union[DatasetMean, DatasetVar, DatasetStd]] = (
        set()
    )
    required_sample_quantiles: Dict[
        Tuple[MemberId, Optional[Tuple[AxisId, ...]], QuantileMethod], Set[float]
    ] = {}
    required_dataset_quantiles: Dict[
        Tuple[MemberId, Optional[Tuple[AxisId, ...]]], Set[float]
    ] = {}

    for rm in required_measures:
        if isinstance(rm, SampleMean):
            required_sample_means.add(rm)
        elif isinstance(rm, DatasetMean):
            required_dataset_means.add(rm)
        elif isinstance(rm, (SampleVar, SampleStd)):
            required_sample_mean_var_std.update(
                {
                    msv(axes=rm.axes, member_id=rm.member_id)
                    for msv in (SampleMean, SampleStd, SampleVar)
                }
            )
            assert rm in required_sample_mean_var_std
        elif isinstance(rm, (DatasetVar, DatasetStd)):
            required_dataset_mean_var_std.update(
                {
                    msv(axes=rm.axes, member_id=rm.member_id)
                    for msv in (DatasetMean, DatasetStd, DatasetVar)
                }
            )
            assert rm in required_dataset_mean_var_std
        elif isinstance(rm, SampleQuantile):
            required_sample_quantiles.setdefault(
                (rm.member_id, rm.axes, rm.method), set()
            ).add(rm.q)
        elif isinstance(rm, DatasetQuantile):
            required_dataset_quantiles.setdefault((rm.member_id, rm.axes), set()).add(
                rm.q
            )
        else:
            assert_never(rm)

    for rm in required_sample_means:
        if rm in required_sample_mean_var_std:
            # computed togehter with var and std
            continue

        sample_calculators.append(MeanCalculator(member_id=rm.member_id, axes=rm.axes))

    for rm in required_sample_mean_var_std:
        sample_calculators.append(
            MeanVarStdCalculator(member_id=rm.member_id, axes=rm.axes)
        )

    for rm in required_dataset_means:
        if rm in required_dataset_mean_var_std:
            # computed togehter with var and std
            continue

        dataset_calculators.append(MeanCalculator(member_id=rm.member_id, axes=rm.axes))

    for rm in required_dataset_mean_var_std:
        dataset_calculators.append(
            MeanVarStdCalculator(member_id=rm.member_id, axes=rm.axes)
        )

    for (tid, axes, m), qs in required_sample_quantiles.items():
        sample_calculators.append(
            SampleQuantilesCalculator(member_id=tid, axes=axes, qs=qs, method=m)
        )

    for (tid, axes), qs in required_dataset_quantiles.items():
        dataset_calculators.append(
            DatasetQuantilesCalculator(member_id=tid, axes=axes, qs=qs)
        )

    return sample_calculators, dataset_calculators