Skip to content

Posteriors

DirectPosterior

Bases: NeuralPosterior

Posterior \(p(\theta|x_o)\) with log_prob() and sample() methods, only applicable to SNPE.

SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases.

Specifically, this class offers the following functionality:
- correct the calculation of the log probability such that it compensates for the leakage.
- reject samples that lie outside of the prior bounds.

This class can not be used in combination with SNLE or SNRE.

Source code in sbi/inference/posteriors/direct_posterior.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
class DirectPosterior(NeuralPosterior):
    r"""Posterior $p(\theta|x_o)$ with `log_prob()` and `sample()` methods, only
    applicable to SNPE.<br/><br/>
    SNPE trains a neural network to directly approximate the posterior distribution.
    However, for bounded priors, the neural network can have leakage: it puts non-zero
    mass in regions where the prior is zero. The `DirectPosterior` class wraps the
    trained network to deal with these cases.<br/><br/>
    Specifically, this class offers the following functionality:<br/>
    - correct the calculation of the log probability such that it compensates for the
      leakage.<br/>
    - reject samples that lie outside of the prior bounds.<br/><br/>
    This class can not be used in combination with SNLE or SNRE.
    """

    def __init__(
        self,
        posterior_estimator: ConditionalDensityEstimator,
        prior: Distribution,
        max_sampling_batch_size: int = 10_000,
        device: Optional[str] = None,
        x_shape: Optional[torch.Size] = None,
        enable_transform: bool = True,
    ):
        """
        Args:
            prior: Prior distribution with `.log_prob()` and `.sample()`.
            posterior_estimator: The trained neural posterior.
            max_sampling_batch_size: Batchsize of samples being drawn from
                the proposal at every iteration.
            device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
                `potential_fn.device` is used.
            x_shape: Deprecated, should not be passed.
            enable_transform: Whether to transform parameters to unconstrained space
                during MAP optimization. When False, an identity transform will be
                returned for `theta_transform`.
        """
        # Because `DirectPosterior` does not take the `potential_fn` as input, it
        # builds it itself. The `potential_fn` and `theta_transform` are used only for
        # obtaining the MAP.
        check_prior(prior)
        potential_fn, theta_transform = posterior_estimator_based_potential(
            posterior_estimator,
            prior,
            x_o=None,
            enable_transform=enable_transform,
        )

        super().__init__(
            potential_fn=potential_fn,
            theta_transform=theta_transform,
            device=device,
            x_shape=x_shape,
        )

        self.prior = prior
        self.posterior_estimator = posterior_estimator

        self.max_sampling_batch_size = max_sampling_batch_size
        self._leakage_density_correction_factor = None

        self._purpose = """It samples the posterior network and rejects samples that
            lie outside of the prior bounds."""

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        max_sampling_batch_size: int = 10_000,
        sample_with: Optional[str] = None,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Return samples from posterior distribution $p(\theta|x)$.

        Args:
            sample_shape: Desired shape of samples that are drawn from posterior. If
                sample_shape is multidimensional we simply draw `sample_shape.numel()`
                samples and then reshape into the desired shape.
            sample_with: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
            show_progress_bars: Whether to show sampling progress monitor.
        """
        num_samples = torch.Size(sample_shape).numel()
        x = self._x_else_default_x(x)
        x = reshape_to_batch_event(
            x, event_shape=self.posterior_estimator.condition_shape
        )
        if x.shape[0] > 1:
            raise ValueError(
                ".sample() supports only `batchsize == 1`. If you intend "
                "to sample multiple observations, use `.sample_batched()`. "
                "If you intend to sample i.i.d. observations, set up the "
                "posterior density estimator with an appropriate permutation "
                "invariant embedding net."
            )

        max_sampling_batch_size = (
            self.max_sampling_batch_size
            if max_sampling_batch_size is None
            else max_sampling_batch_size
        )

        if sample_with is not None:
            raise ValueError(
                f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
                f"`sample_with` is no longer supported. You have to rerun "
                f"`.build_posterior(sample_with={sample_with}).`"
            )

        samples = rejection.accept_reject_sample(
            proposal=self.posterior_estimator,
            accept_reject_fn=lambda theta: within_support(self.prior, theta),
            num_samples=num_samples,
            show_progress_bars=show_progress_bars,
            max_sampling_batch_size=max_sampling_batch_size,
            proposal_sampling_kwargs={"condition": x},
            alternative_method="build_posterior(..., sample_with='mcmc')",
        )[0]  # [0] to return only samples, not acceptance probabilities.

        return samples[:, 0]  # Remove batch dimension.

    def sample_batched(
        self,
        sample_shape: Shape,
        x: Tensor,
        max_sampling_batch_size: int = 10_000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Given a batch of observations [x_1, ..., x_B] this function samples from
        posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
        manner.

        Args:
            sample_shape: Desired shape of samples that are drawn from the posterior
                given every observation.
            x: A batch of observations, of shape `(batch_dim, event_shape_x)`.
                `batch_dim` corresponds to the number of observations to be drawn.
            max_sampling_batch_size: Maximum batch size for rejection sampling.
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples from the posteriors of shape (*sample_shape, B, *input_shape)
        """
        num_samples = torch.Size(sample_shape).numel()
        condition_shape = self.posterior_estimator.condition_shape
        x = reshape_to_batch_event(x, event_shape=condition_shape)

        max_sampling_batch_size = (
            self.max_sampling_batch_size
            if max_sampling_batch_size is None
            else max_sampling_batch_size
        )

        samples = rejection.accept_reject_sample(
            proposal=self.posterior_estimator,
            accept_reject_fn=lambda theta: within_support(self.prior, theta),
            num_samples=num_samples,
            show_progress_bars=show_progress_bars,
            max_sampling_batch_size=max_sampling_batch_size,
            proposal_sampling_kwargs={"condition": x},
            alternative_method="build_posterior(..., sample_with='mcmc')",
        )[0]

        return samples

    def log_prob(
        self,
        theta: Tensor,
        x: Optional[Tensor] = None,
        norm_posterior: bool = True,
        track_gradients: bool = False,
        leakage_correction_params: Optional[dict] = None,
    ) -> Tensor:
        r"""Returns the log-probability of the posterior $p(\theta|x)$.

        Args:
            theta: Parameters $\theta$.
            norm_posterior: Whether to enforce a normalized posterior density.
                Renormalization of the posterior is useful when some
                probability falls out or leaks out of the prescribed prior support.
                The normalizing factor is calculated via rejection sampling, so if you
                need speedier but unnormalized log posterior estimates set here
                `norm_posterior=False`. The returned log posterior is set to
                -∞ outside of the prior support regardless of this setting.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.
            leakage_correction_params: A `dict` of keyword arguments to override the
                default values of `leakage_correction()`. Possible options are:
                `num_rejection_samples`, `force_update`, `show_progress_bars`, and
                `rejection_sampling_batch_size`.
                These parameters only have an effect if `norm_posterior=True`.

        Returns:
            `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the
            support of the prior, -∞ (corresponding to 0 probability) outside.
        """
        x = self._x_else_default_x(x)

        theta = ensure_theta_batched(torch.as_tensor(theta))
        theta_density_estimator = reshape_to_sample_batch_event(
            theta, theta.shape[1:], leading_is_sample=True
        )
        x_density_estimator = reshape_to_batch_event(
            x, event_shape=self.posterior_estimator.condition_shape
        )
        if x_density_estimator.shape[0] > 1:
            raise ValueError(
                ".log_prob() supports only `batchsize == 1`. If you intend "
                "to evaluate given multiple observations, use `.log_prob_batched()`. "
                "If you intend to evaluate given i.i.d. observations, set up the "
                "posterior density estimator with an appropriate permutation "
                "invariant embedding net."
            )

        self.posterior_estimator.eval()

        with torch.set_grad_enabled(track_gradients):
            # Evaluate on device, move back to cpu for comparison with prior.
            unnorm_log_prob = self.posterior_estimator.log_prob(
                theta_density_estimator, condition=x_density_estimator
            )
            # `log_prob` supports only a single observation (i.e. `batchsize==1`).
            # We now remove this additional dimension.
            unnorm_log_prob = unnorm_log_prob.squeeze(dim=1)

            # Force probability to be zero outside prior support.
            in_prior_support = within_support(self.prior, theta)

            masked_log_prob = torch.where(
                in_prior_support,
                unnorm_log_prob,
                torch.tensor(float("-inf"), dtype=torch.float32, device=self._device),
            )

            if leakage_correction_params is None:
                leakage_correction_params = dict()  # use defaults
            log_factor = (
                log(self.leakage_correction(x=x, **leakage_correction_params))
                if norm_posterior
                else 0
            )

            return masked_log_prob - log_factor

    def log_prob_batched(
        self,
        theta: Tensor,
        x: Tensor,
        norm_posterior: bool = True,
        track_gradients: bool = False,
        leakage_correction_params: Optional[dict] = None,
    ) -> Tensor:
        """Given a batch of observations [x_1, ..., x_B] and a batch of parameters \
            [$\theta_1$,..., $\theta_B$] this function evalautes the log-probabilities \
            of the posteriors $p(\theta_1|x_1)$, ..., $p(\theta_B|x_B)$ in a batched \
            (i.e. vectorized) manner.

        Args:
            theta: Batch of parameters $\theta$ of shape \
                `(*sample_shape, batch_dim, *theta_shape)`.
            x: Batch of observations $x$ of shape \
                `(batch_dim, *condition_shape)`.
            norm_posterior: Whether to enforce a normalized posterior density.
                Renormalization of the posterior is useful when some
                probability falls out or leaks out of the prescribed prior support.
                The normalizing factor is calculated via rejection sampling, so if you
                need speedier but unnormalized log posterior estimates set here
                `norm_posterior=False`. The returned log posterior is set to
                -∞ outside of the prior support regardless of this setting.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.
            leakage_correction_params: A `dict` of keyword arguments to override the
                default values of `leakage_correction()`. Possible options are:
                `num_rejection_samples`, `force_update`, `show_progress_bars`, and
                `rejection_sampling_batch_size`.
                These parameters only have an effect if `norm_posterior=True`.

        Returns:
            `(len(θ), B)`-shaped log posterior probability $\\log p(\theta|x)$\\ for θ \
            in the support of the prior, -∞ (corresponding to 0 probability) outside.
        """

        theta = ensure_theta_batched(torch.as_tensor(theta))
        event_shape = self.posterior_estimator.input_shape
        theta_density_estimator = reshape_to_sample_batch_event(
            theta, event_shape, leading_is_sample=True
        )
        x_density_estimator = reshape_to_batch_event(
            x, event_shape=self.posterior_estimator.condition_shape
        )

        self.posterior_estimator.eval()

        with torch.set_grad_enabled(track_gradients):
            # Evaluate on device, move back to cpu for comparison with prior.
            unnorm_log_prob = self.posterior_estimator.log_prob(
                theta_density_estimator, condition=x_density_estimator
            )

            # Force probability to be zero outside prior support.
            in_prior_support = within_support(self.prior, theta)

            masked_log_prob = torch.where(
                in_prior_support,
                unnorm_log_prob,
                torch.tensor(float("-inf"), dtype=torch.float32, device=self._device),
            )

            if leakage_correction_params is None:
                leakage_correction_params = dict()  # use defaults
            log_factor = (
                log(self.leakage_correction(x=x, **leakage_correction_params))
                if norm_posterior
                else 0
            )

            return masked_log_prob - log_factor

    @torch.no_grad()
    def leakage_correction(
        self,
        x: Tensor,
        num_rejection_samples: int = 10_000,
        force_update: bool = False,
        show_progress_bars: bool = False,
        rejection_sampling_batch_size: int = 10_000,
    ) -> Tensor:
        r"""Return leakage correction factor for a leaky posterior density estimate.

        The factor is estimated from the acceptance probability during rejection
        sampling from the posterior.

        This is to avoid re-estimating the acceptance probability from scratch
        whenever `log_prob` is called and `norm_posterior=True`. Here, it
        is estimated only once for `self.default_x` and saved for later. We
        re-evaluate only whenever a new `x` is passed.

        Arguments:
            num_rejection_samples: Number of samples used to estimate correction factor.
            show_progress_bars: Whether to show a progress bar during sampling.
            rejection_sampling_batch_size: Batch size for rejection sampling.

        Returns:
            Saved or newly-estimated correction factor (as a scalar `Tensor`).
        """

        def acceptance_at(x: Tensor) -> Tensor:
            # [1:] to remove batch-dimension for `reshape_to_batch_event`.
            return rejection.accept_reject_sample(
                proposal=self.posterior_estimator,
                accept_reject_fn=lambda theta: within_support(self.prior, theta),
                num_samples=num_rejection_samples,
                show_progress_bars=show_progress_bars,
                sample_for_correction_factor=True,
                max_sampling_batch_size=rejection_sampling_batch_size,
                proposal_sampling_kwargs={
                    "condition": reshape_to_batch_event(
                        x, event_shape=self.posterior_estimator.condition_shape
                    )
                },
            )[1]

        # Check if the provided x matches the default x (short-circuit on identity).
        is_new_x = self.default_x is None or (
            x is not self.default_x and (x != self.default_x).any()
        )

        not_saved_at_default_x = self._leakage_density_correction_factor is None

        if is_new_x:  # Calculate at x; don't save.
            return acceptance_at(x)
        elif not_saved_at_default_x or force_update:  # Calculate at default_x; save.
            assert self.default_x is not None
            self._leakage_density_correction_factor = acceptance_at(self.default_x)

        return self._leakage_density_correction_factor  # type: ignore

    def map(
        self,
        x: Optional[Tensor] = None,
        num_iter: int = 1_000,
        num_to_optimize: int = 100,
        learning_rate: float = 0.01,
        init_method: Union[str, Tensor] = "posterior",
        num_init_samples: int = 1_000,
        save_best_every: int = 10,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from the
                posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.
            log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
                {'norm_posterior': True} for SNPE.

        Returns:
            The MAP estimate.
        """
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

__init__(posterior_estimator, prior, max_sampling_batch_size=10000, device=None, x_shape=None, enable_transform=True)

Parameters:

Name Type Description Default
prior Distribution

Prior distribution with .log_prob() and .sample().

required
posterior_estimator ConditionalDensityEstimator

The trained neural posterior.

required
max_sampling_batch_size int

Batchsize of samples being drawn from the proposal at every iteration.

10000
device Optional[str]

Training device, e.g., “cpu”, “cuda” or “cuda:0”. If None, potential_fn.device is used.

None
x_shape Optional[Size]

Deprecated, should not be passed.

None
enable_transform bool

Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform.

True
Source code in sbi/inference/posteriors/direct_posterior.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def __init__(
    self,
    posterior_estimator: ConditionalDensityEstimator,
    prior: Distribution,
    max_sampling_batch_size: int = 10_000,
    device: Optional[str] = None,
    x_shape: Optional[torch.Size] = None,
    enable_transform: bool = True,
):
    """
    Args:
        prior: Prior distribution with `.log_prob()` and `.sample()`.
        posterior_estimator: The trained neural posterior.
        max_sampling_batch_size: Batchsize of samples being drawn from
            the proposal at every iteration.
        device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
            `potential_fn.device` is used.
        x_shape: Deprecated, should not be passed.
        enable_transform: Whether to transform parameters to unconstrained space
            during MAP optimization. When False, an identity transform will be
            returned for `theta_transform`.
    """
    # Because `DirectPosterior` does not take the `potential_fn` as input, it
    # builds it itself. The `potential_fn` and `theta_transform` are used only for
    # obtaining the MAP.
    check_prior(prior)
    potential_fn, theta_transform = posterior_estimator_based_potential(
        posterior_estimator,
        prior,
        x_o=None,
        enable_transform=enable_transform,
    )

    super().__init__(
        potential_fn=potential_fn,
        theta_transform=theta_transform,
        device=device,
        x_shape=x_shape,
    )

    self.prior = prior
    self.posterior_estimator = posterior_estimator

    self.max_sampling_batch_size = max_sampling_batch_size
    self._leakage_density_correction_factor = None

    self._purpose = """It samples the posterior network and rejects samples that
        lie outside of the prior bounds."""

leakage_correction(x, num_rejection_samples=10000, force_update=False, show_progress_bars=False, rejection_sampling_batch_size=10000)

Return leakage correction factor for a leaky posterior density estimate.

The factor is estimated from the acceptance probability during rejection sampling from the posterior.

This is to avoid re-estimating the acceptance probability from scratch whenever log_prob is called and norm_posterior=True. Here, it is estimated only once for self.default_x and saved for later. We re-evaluate only whenever a new x is passed.

Parameters:

Name Type Description Default
num_rejection_samples int

Number of samples used to estimate correction factor.

10000
show_progress_bars bool

Whether to show a progress bar during sampling.

False
rejection_sampling_batch_size int

Batch size for rejection sampling.

10000

Returns:

Type Description
Tensor

Saved or newly-estimated correction factor (as a scalar Tensor).

Source code in sbi/inference/posteriors/direct_posterior.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
@torch.no_grad()
def leakage_correction(
    self,
    x: Tensor,
    num_rejection_samples: int = 10_000,
    force_update: bool = False,
    show_progress_bars: bool = False,
    rejection_sampling_batch_size: int = 10_000,
) -> Tensor:
    r"""Return leakage correction factor for a leaky posterior density estimate.

    The factor is estimated from the acceptance probability during rejection
    sampling from the posterior.

    This is to avoid re-estimating the acceptance probability from scratch
    whenever `log_prob` is called and `norm_posterior=True`. Here, it
    is estimated only once for `self.default_x` and saved for later. We
    re-evaluate only whenever a new `x` is passed.

    Arguments:
        num_rejection_samples: Number of samples used to estimate correction factor.
        show_progress_bars: Whether to show a progress bar during sampling.
        rejection_sampling_batch_size: Batch size for rejection sampling.

    Returns:
        Saved or newly-estimated correction factor (as a scalar `Tensor`).
    """

    def acceptance_at(x: Tensor) -> Tensor:
        # [1:] to remove batch-dimension for `reshape_to_batch_event`.
        return rejection.accept_reject_sample(
            proposal=self.posterior_estimator,
            accept_reject_fn=lambda theta: within_support(self.prior, theta),
            num_samples=num_rejection_samples,
            show_progress_bars=show_progress_bars,
            sample_for_correction_factor=True,
            max_sampling_batch_size=rejection_sampling_batch_size,
            proposal_sampling_kwargs={
                "condition": reshape_to_batch_event(
                    x, event_shape=self.posterior_estimator.condition_shape
                )
            },
        )[1]

    # Check if the provided x matches the default x (short-circuit on identity).
    is_new_x = self.default_x is None or (
        x is not self.default_x and (x != self.default_x).any()
    )

    not_saved_at_default_x = self._leakage_density_correction_factor is None

    if is_new_x:  # Calculate at x; don't save.
        return acceptance_at(x)
    elif not_saved_at_default_x or force_update:  # Calculate at default_x; save.
        assert self.default_x is not None
        self._leakage_density_correction_factor = acceptance_at(self.default_x)

    return self._leakage_density_correction_factor  # type: ignore

log_prob(theta, x=None, norm_posterior=True, track_gradients=False, leakage_correction_params=None)

Returns the log-probability of the posterior \(p(\theta|x)\).

Parameters:

Name Type Description Default
theta Tensor

Parameters \(\theta\).

required
norm_posterior bool

Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False. The returned log posterior is set to -∞ outside of the prior support regardless of this setting.

True
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False
leakage_correction_params Optional[dict]

A dict of keyword arguments to override the default values of leakage_correction(). Possible options are: num_rejection_samples, force_update, show_progress_bars, and rejection_sampling_batch_size. These parameters only have an effect if norm_posterior=True.

None

Returns:

Type Description
Tensor

(len(θ),)-shaped log posterior probability \(\log p(\theta|x)\) for θ in the

Tensor

support of the prior, -∞ (corresponding to 0 probability) outside.

Source code in sbi/inference/posteriors/direct_posterior.py
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
def log_prob(
    self,
    theta: Tensor,
    x: Optional[Tensor] = None,
    norm_posterior: bool = True,
    track_gradients: bool = False,
    leakage_correction_params: Optional[dict] = None,
) -> Tensor:
    r"""Returns the log-probability of the posterior $p(\theta|x)$.

    Args:
        theta: Parameters $\theta$.
        norm_posterior: Whether to enforce a normalized posterior density.
            Renormalization of the posterior is useful when some
            probability falls out or leaks out of the prescribed prior support.
            The normalizing factor is calculated via rejection sampling, so if you
            need speedier but unnormalized log posterior estimates set here
            `norm_posterior=False`. The returned log posterior is set to
            -∞ outside of the prior support regardless of this setting.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.
        leakage_correction_params: A `dict` of keyword arguments to override the
            default values of `leakage_correction()`. Possible options are:
            `num_rejection_samples`, `force_update`, `show_progress_bars`, and
            `rejection_sampling_batch_size`.
            These parameters only have an effect if `norm_posterior=True`.

    Returns:
        `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the
        support of the prior, -∞ (corresponding to 0 probability) outside.
    """
    x = self._x_else_default_x(x)

    theta = ensure_theta_batched(torch.as_tensor(theta))
    theta_density_estimator = reshape_to_sample_batch_event(
        theta, theta.shape[1:], leading_is_sample=True
    )
    x_density_estimator = reshape_to_batch_event(
        x, event_shape=self.posterior_estimator.condition_shape
    )
    if x_density_estimator.shape[0] > 1:
        raise ValueError(
            ".log_prob() supports only `batchsize == 1`. If you intend "
            "to evaluate given multiple observations, use `.log_prob_batched()`. "
            "If you intend to evaluate given i.i.d. observations, set up the "
            "posterior density estimator with an appropriate permutation "
            "invariant embedding net."
        )

    self.posterior_estimator.eval()

    with torch.set_grad_enabled(track_gradients):
        # Evaluate on device, move back to cpu for comparison with prior.
        unnorm_log_prob = self.posterior_estimator.log_prob(
            theta_density_estimator, condition=x_density_estimator
        )
        # `log_prob` supports only a single observation (i.e. `batchsize==1`).
        # We now remove this additional dimension.
        unnorm_log_prob = unnorm_log_prob.squeeze(dim=1)

        # Force probability to be zero outside prior support.
        in_prior_support = within_support(self.prior, theta)

        masked_log_prob = torch.where(
            in_prior_support,
            unnorm_log_prob,
            torch.tensor(float("-inf"), dtype=torch.float32, device=self._device),
        )

        if leakage_correction_params is None:
            leakage_correction_params = dict()  # use defaults
        log_factor = (
            log(self.leakage_correction(x=x, **leakage_correction_params))
            if norm_posterior
            else 0
        )

        return masked_log_prob - log_factor

log_prob_batched(theta, x, norm_posterior=True, track_gradients=False, leakage_correction_params=None)

Given a batch of observations [x_1, …, x_B] and a batch of parameters [$ heta_1$,…, $ heta_B$] this function evalautes the log-probabilities of the posteriors \(p( heta_1|x_1)\), …, \(p( heta_B|x_B)\) in a batched (i.e. vectorized) manner.

Parameters:

Name Type Description Default
theta Tensor

Batch of parameters $ heta$ of shape (*sample_shape, batch_dim, *theta_shape).

required
x Tensor

Batch of observations \(x\) of shape (batch_dim, *condition_shape).

required
norm_posterior bool

Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False. The returned log posterior is set to -∞ outside of the prior support regardless of this setting.

True
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False
leakage_correction_params Optional[dict]

A dict of keyword arguments to override the default values of leakage_correction(). Possible options are: num_rejection_samples, force_update, show_progress_bars, and rejection_sampling_batch_size. These parameters only have an effect if norm_posterior=True.

None

Returns:

Type Description
Tensor

(len(θ), B)-shaped log posterior probability \(\log p( heta|x)\) for θ in the support of the prior, -∞ (corresponding to 0 probability) outside.

Source code in sbi/inference/posteriors/direct_posterior.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
def log_prob_batched(
    self,
    theta: Tensor,
    x: Tensor,
    norm_posterior: bool = True,
    track_gradients: bool = False,
    leakage_correction_params: Optional[dict] = None,
) -> Tensor:
    """Given a batch of observations [x_1, ..., x_B] and a batch of parameters \
        [$\theta_1$,..., $\theta_B$] this function evalautes the log-probabilities \
        of the posteriors $p(\theta_1|x_1)$, ..., $p(\theta_B|x_B)$ in a batched \
        (i.e. vectorized) manner.

    Args:
        theta: Batch of parameters $\theta$ of shape \
            `(*sample_shape, batch_dim, *theta_shape)`.
        x: Batch of observations $x$ of shape \
            `(batch_dim, *condition_shape)`.
        norm_posterior: Whether to enforce a normalized posterior density.
            Renormalization of the posterior is useful when some
            probability falls out or leaks out of the prescribed prior support.
            The normalizing factor is calculated via rejection sampling, so if you
            need speedier but unnormalized log posterior estimates set here
            `norm_posterior=False`. The returned log posterior is set to
            -∞ outside of the prior support regardless of this setting.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.
        leakage_correction_params: A `dict` of keyword arguments to override the
            default values of `leakage_correction()`. Possible options are:
            `num_rejection_samples`, `force_update`, `show_progress_bars`, and
            `rejection_sampling_batch_size`.
            These parameters only have an effect if `norm_posterior=True`.

    Returns:
        `(len(θ), B)`-shaped log posterior probability $\\log p(\theta|x)$\\ for θ \
        in the support of the prior, -∞ (corresponding to 0 probability) outside.
    """

    theta = ensure_theta_batched(torch.as_tensor(theta))
    event_shape = self.posterior_estimator.input_shape
    theta_density_estimator = reshape_to_sample_batch_event(
        theta, event_shape, leading_is_sample=True
    )
    x_density_estimator = reshape_to_batch_event(
        x, event_shape=self.posterior_estimator.condition_shape
    )

    self.posterior_estimator.eval()

    with torch.set_grad_enabled(track_gradients):
        # Evaluate on device, move back to cpu for comparison with prior.
        unnorm_log_prob = self.posterior_estimator.log_prob(
            theta_density_estimator, condition=x_density_estimator
        )

        # Force probability to be zero outside prior support.
        in_prior_support = within_support(self.prior, theta)

        masked_log_prob = torch.where(
            in_prior_support,
            unnorm_log_prob,
            torch.tensor(float("-inf"), dtype=torch.float32, device=self._device),
        )

        if leakage_correction_params is None:
            leakage_correction_params = dict()  # use defaults
        log_factor = (
            log(self.leakage_correction(x=x, **leakage_correction_params))
            if norm_posterior
            else 0
        )

        return masked_log_prob - log_factor

map(x=None, num_iter=1000, num_to_optimize=100, learning_rate=0.01, init_method='posterior', num_init_samples=1000, save_best_every=10, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
learning_rate float

Learning rate of the optimizer.

0.01
init_method Union[str, Tensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'posterior'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

1000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

100
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

10
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False
log_prob_kwargs

Will be empty for SNLE and SNRE. Will contain {‘norm_posterior’: True} for SNPE.

required

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/direct_posterior.py
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
def map(
    self,
    x: Optional[Tensor] = None,
    num_iter: int = 1_000,
    num_to_optimize: int = 100,
    learning_rate: float = 0.01,
    init_method: Union[str, Tensor] = "posterior",
    num_init_samples: int = 1_000,
    save_best_every: int = 10,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from the
            posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.
        log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
            {'norm_posterior': True} for SNPE.

    Returns:
        The MAP estimate.
    """
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, max_sampling_batch_size=10000, sample_with=None, show_progress_bars=True)

Return samples from posterior distribution \(p(\theta|x)\).

Parameters:

Name Type Description Default
sample_shape Shape

Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape.

Size()
sample_with Optional[str]

This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error.

None
show_progress_bars bool

Whether to show sampling progress monitor.

True
Source code in sbi/inference/posteriors/direct_posterior.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    max_sampling_batch_size: int = 10_000,
    sample_with: Optional[str] = None,
    show_progress_bars: bool = True,
) -> Tensor:
    r"""Return samples from posterior distribution $p(\theta|x)$.

    Args:
        sample_shape: Desired shape of samples that are drawn from posterior. If
            sample_shape is multidimensional we simply draw `sample_shape.numel()`
            samples and then reshape into the desired shape.
        sample_with: This argument only exists to keep backward-compatibility with
            `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
        show_progress_bars: Whether to show sampling progress monitor.
    """
    num_samples = torch.Size(sample_shape).numel()
    x = self._x_else_default_x(x)
    x = reshape_to_batch_event(
        x, event_shape=self.posterior_estimator.condition_shape
    )
    if x.shape[0] > 1:
        raise ValueError(
            ".sample() supports only `batchsize == 1`. If you intend "
            "to sample multiple observations, use `.sample_batched()`. "
            "If you intend to sample i.i.d. observations, set up the "
            "posterior density estimator with an appropriate permutation "
            "invariant embedding net."
        )

    max_sampling_batch_size = (
        self.max_sampling_batch_size
        if max_sampling_batch_size is None
        else max_sampling_batch_size
    )

    if sample_with is not None:
        raise ValueError(
            f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
            f"`sample_with` is no longer supported. You have to rerun "
            f"`.build_posterior(sample_with={sample_with}).`"
        )

    samples = rejection.accept_reject_sample(
        proposal=self.posterior_estimator,
        accept_reject_fn=lambda theta: within_support(self.prior, theta),
        num_samples=num_samples,
        show_progress_bars=show_progress_bars,
        max_sampling_batch_size=max_sampling_batch_size,
        proposal_sampling_kwargs={"condition": x},
        alternative_method="build_posterior(..., sample_with='mcmc')",
    )[0]  # [0] to return only samples, not acceptance probabilities.

    return samples[:, 0]  # Remove batch dimension.

sample_batched(sample_shape, x, max_sampling_batch_size=10000, show_progress_bars=True)

Given a batch of observations [x_1, …, x_B] this function samples from posteriors \(p(\theta|x_1)\), … ,\(p(\theta|x_B)\), in a batched (i.e. vectorized) manner.

Parameters:

Name Type Description Default
sample_shape Shape

Desired shape of samples that are drawn from the posterior given every observation.

required
x Tensor

A batch of observations, of shape (batch_dim, event_shape_x). batch_dim corresponds to the number of observations to be drawn.

required
max_sampling_batch_size int

Maximum batch size for rejection sampling.

10000
show_progress_bars bool

Whether to show sampling progress monitor.

True

Returns:

Type Description
Tensor

Samples from the posteriors of shape (*sample_shape, B, *input_shape)

Source code in sbi/inference/posteriors/direct_posterior.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
def sample_batched(
    self,
    sample_shape: Shape,
    x: Tensor,
    max_sampling_batch_size: int = 10_000,
    show_progress_bars: bool = True,
) -> Tensor:
    r"""Given a batch of observations [x_1, ..., x_B] this function samples from
    posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
    manner.

    Args:
        sample_shape: Desired shape of samples that are drawn from the posterior
            given every observation.
        x: A batch of observations, of shape `(batch_dim, event_shape_x)`.
            `batch_dim` corresponds to the number of observations to be drawn.
        max_sampling_batch_size: Maximum batch size for rejection sampling.
        show_progress_bars: Whether to show sampling progress monitor.

    Returns:
        Samples from the posteriors of shape (*sample_shape, B, *input_shape)
    """
    num_samples = torch.Size(sample_shape).numel()
    condition_shape = self.posterior_estimator.condition_shape
    x = reshape_to_batch_event(x, event_shape=condition_shape)

    max_sampling_batch_size = (
        self.max_sampling_batch_size
        if max_sampling_batch_size is None
        else max_sampling_batch_size
    )

    samples = rejection.accept_reject_sample(
        proposal=self.posterior_estimator,
        accept_reject_fn=lambda theta: within_support(self.prior, theta),
        num_samples=num_samples,
        show_progress_bars=show_progress_bars,
        max_sampling_batch_size=max_sampling_batch_size,
        proposal_sampling_kwargs={"condition": x},
        alternative_method="build_posterior(..., sample_with='mcmc')",
    )[0]

    return samples

ImportanceSamplingPosterior

Bases: NeuralPosterior

Provides importance sampling to sample from the posterior.

SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample()) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample())

Source code in sbi/inference/posteriors/importance_posterior.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
class ImportanceSamplingPosterior(NeuralPosterior):
    r"""Provides importance sampling to sample from the posterior.<br/><br/>
    SNLE or SNRE train neural networks to approximate the likelihood(-ratios).
    `ImportanceSamplingPosterior` allows to estimate the posterior log-probability by
    estimating the normlalization constant with importance sampling. It also allows to
    perform importance sampling (with `.sample()`) and to draw approximate samples with
    sampling-importance-resampling (SIR) (with `.sir_sample()`)
    """

    def __init__(
        self,
        potential_fn: Union[Callable, BasePotential],
        proposal: Any,
        theta_transform: Optional[TorchTransform] = None,
        method: str = "sir",
        oversampling_factor: int = 32,
        max_sampling_batch_size: int = 10_000,
        device: Optional[str] = None,
        x_shape: Optional[torch.Size] = None,
    ):
        """
        Args:
            potential_fn: The potential function from which to draw samples. Must be a
                `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
            proposal: The proposal distribution.
            theta_transform: Transformation that is applied to parameters. Is not used
                during but only when calling `.map()`.
            method: Either of [`sir`|`importance`]. This sets the behavior of the
                `.sample()` method. With `sir`, approximate posterior samples are
                generated with sampling importance resampling (SIR). With
                `importance`, the `.sample()` method returns a tuple of samples and
                corresponding importance weights.
            oversampling_factor: Number of proposed samples from which only one is
                selected based on its importance weight.
            max_sampling_batch_size: The batch size of samples being drawn from the
                proposal at every iteration.
            device: Device on which to sample, e.g., "cpu", "cuda" or "cuda:0". If
                None, `potential_fn.device` is used.
            x_shape: Deprecated, should not be passed.
        """
        super().__init__(
            potential_fn,
            theta_transform=theta_transform,
            device=device,
            x_shape=x_shape,
        )

        self.proposal = proposal
        self._normalization_constant = None
        self.method = method

        self.oversampling_factor = oversampling_factor
        self.max_sampling_batch_size = max_sampling_batch_size

        self._purpose = (
            "It provides sampling-importance resampling (SIR) to .sample() from the "
            "posterior and can evaluate the _unnormalized_ posterior density with "
            ".log_prob()."
        )

    def log_prob(
        self,
        theta: Tensor,
        x: Optional[Tensor] = None,
        track_gradients: bool = False,
        normalization_constant_params: Optional[dict] = None,
    ) -> Tensor:
        r"""Returns the log-probability of theta under the posterior.

        The normalization constant is estimated with importance sampling.

        Args:
            theta: Parameters $\theta$.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.
            normalization_constant_params: Parameters passed on to
                `estimate_normalization_constant()`.

        Returns:
            `len($\theta$)`-shaped log-probability.
        """
        x = self._x_else_default_x(x)
        self.potential_fn.set_x(x)

        theta = ensure_theta_batched(torch.as_tensor(theta))

        with torch.set_grad_enabled(track_gradients):
            potential_values = self.potential_fn(
                theta.to(self._device), track_gradients=track_gradients
            )

            if normalization_constant_params is None:
                normalization_constant_params = dict()  # use defaults
            normalization_constant = self.estimate_normalization_constant(
                x, **normalization_constant_params
            )

            return (potential_values - torch.log(normalization_constant)).to(
                self._device
            )

    @torch.no_grad()
    def estimate_normalization_constant(
        self, x: Tensor, num_samples: int = 10_000, force_update: bool = False
    ) -> Tensor:
        """Returns the normalization constant via importance sampling.

        Args:
            num_samples: Number of importance samples used for the estimate.
            force_update: Whether to re-calculate the normlization constant when x is
                unchanged and have a cached value.
        """
        # Check if the provided x matches the default x (short-circuit on identity).
        is_new_x = self.default_x is None or (
            x is not self.default_x and (x != self.default_x).any()
        )

        not_saved_at_default_x = self._normalization_constant is None

        if is_new_x:  # Calculate at x; don't save.
            _, log_importance_weights = importance_sample(
                self.potential_fn,
                proposal=self.proposal,
                num_samples=num_samples,
            )
            return torch.mean(torch.exp(log_importance_weights))
        elif not_saved_at_default_x or force_update:  # Calculate at default_x; save.
            assert self.default_x is not None
            _, log_importance_weights = importance_sample(
                self.potential_fn,
                proposal=self.proposal,
                num_samples=num_samples,
            )
            self._normalization_constant = torch.mean(torch.exp(log_importance_weights))

        return self._normalization_constant.to(self._device)  # type: ignore

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        method: Optional[str] = None,
        oversampling_factor: int = 32,
        max_sampling_batch_size: int = 10_000,
        sample_with: Optional[str] = None,
        show_progress_bars: bool = False,
    ) -> Union[Tensor, Tuple[Tensor, Tensor]]:
        """Return samples from the approximate posterior distribution.

        Args:
            sample_shape: Shape of samples that are drawn from posterior.
            x: Observed data.
            method: Either of [`sir`|`importance`]. This sets the behavior of the
                `.sample()` method. With `sir`, approximate posterior samples are
                generated with sampling importance resampling (SIR). With
                `importance`, the `.sample()` method returns a tuple of samples and
                corresponding importance weights.
            oversampling_factor: Number of proposed samples from which only one is
                selected based on its importance weight.
            max_sampling_batch_size: The batch size of samples being drawn from the
                proposal at every iteration.
            show_progress_bars: Whether to show a progressbar during sampling.
        """

        method = self.method if method is None else method

        if sample_with is not None:
            raise ValueError(
                f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
                f"`sample_with` is no longer supported. You have to rerun "
                f"`.build_posterior(sample_with={sample_with}).`"
            )

        self.potential_fn.set_x(self._x_else_default_x(x))

        if method == "sir":
            return self._sir_sample(
                sample_shape,
                oversampling_factor=oversampling_factor,
                max_sampling_batch_size=max_sampling_batch_size,
                show_progress_bars=show_progress_bars,
            )
        elif method == "importance":
            return self._importance_sample(sample_shape)
        else:
            raise NameError

    def sample_batched(
        self,
        sample_shape: Shape,
        x: Tensor,
        max_sampling_batch_size: int = 10000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        raise NotImplementedError(
            "Batched sampling is not implemented for ImportanceSamplingPosterior. \
            Alternatively you can use `sample` in a loop \
            [posterior.sample(theta, x_o) for x_o in x]."
        )

    def _importance_sample(
        self,
        sample_shape: Shape = torch.Size(),
        show_progress_bars: bool = False,
    ) -> Tuple[Tensor, Tensor]:
        """Returns samples from the proposal and log of their importance weights.

        Args:
            sample_shape: Desired shape of samples that are drawn from posterior.
            sample_with: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples and logarithm of corresponding importance weights.
        """
        num_samples = torch.Size(sample_shape).numel()
        samples, log_importance_weights = importance_sample(
            self.potential_fn,
            proposal=self.proposal,
            num_samples=num_samples,
            show_progress_bars=show_progress_bars,
        )

        samples = samples.reshape((*sample_shape, -1)).to(self._device)
        return samples, log_importance_weights.to(self._device)

    def _sir_sample(
        self,
        sample_shape: Shape = torch.Size(),
        oversampling_factor: int = 32,
        max_sampling_batch_size: int = 10_000,
        show_progress_bars: bool = False,
    ):
        r"""Returns approximate samples from posterior $p(\theta|x)$ via SIR.

        Args:
            sample_shape: Desired shape of samples that are drawn from posterior. If
                sample_shape is multidimensional we simply draw `sample_shape.numel()`
                samples and then reshape into the desired shape.
            x: Observed data.
            sample_with: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
            oversampling_factor: Number of proposed samples form which only one is
                selected based on its importance weight.
            max_sampling_batch_size: The batchsize of samples being drawn from
                the proposal at every iteration. Used only in `sir_sample()`.
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples from posterior.
        """
        # Replace arguments that were not passed with their default.
        oversampling_factor = (
            self.oversampling_factor
            if oversampling_factor is None
            else oversampling_factor
        )
        max_sampling_batch_size = (
            self.max_sampling_batch_size
            if max_sampling_batch_size is None
            else max_sampling_batch_size
        )

        num_samples = torch.Size(sample_shape).numel()
        samples = sampling_importance_resampling(
            self.potential_fn,
            proposal=self.proposal,
            num_samples=num_samples,
            num_candidate_samples=oversampling_factor,
            show_progress_bars=show_progress_bars,
            max_sampling_batch_size=max_sampling_batch_size,
            device=self._device,
        )

        return samples.reshape((*sample_shape, -1)).to(self._device)

    def map(
        self,
        x: Optional[Tensor] = None,
        num_iter: int = 1_000,
        num_to_optimize: int = 100,
        learning_rate: float = 0.01,
        init_method: Union[str, Tensor] = "proposal",
        num_init_samples: int = 1_000,
        save_best_every: int = 10,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from the
                posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.
            log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
                {'norm_posterior': True} for SNPE.

        Returns:
            The MAP estimate.
        """
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

__init__(potential_fn, proposal, theta_transform=None, method='sir', oversampling_factor=32, max_sampling_batch_size=10000, device=None, x_shape=None)

Parameters:

Name Type Description Default
potential_fn Union[Callable, BasePotential]

The potential function from which to draw samples. Must be a BasePotential or a Callable which takes theta and x_o as inputs.

required
proposal Any

The proposal distribution.

required
theta_transform Optional[TorchTransform]

Transformation that is applied to parameters. Is not used during but only when calling .map().

None
method str

Either of [sir|importance]. This sets the behavior of the .sample() method. With sir, approximate posterior samples are generated with sampling importance resampling (SIR). With importance, the .sample() method returns a tuple of samples and corresponding importance weights.

'sir'
oversampling_factor int

Number of proposed samples from which only one is selected based on its importance weight.

32
max_sampling_batch_size int

The batch size of samples being drawn from the proposal at every iteration.

10000
device Optional[str]

Device on which to sample, e.g., “cpu”, “cuda” or “cuda:0”. If None, potential_fn.device is used.

None
x_shape Optional[Size]

Deprecated, should not be passed.

None
Source code in sbi/inference/posteriors/importance_posterior.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def __init__(
    self,
    potential_fn: Union[Callable, BasePotential],
    proposal: Any,
    theta_transform: Optional[TorchTransform] = None,
    method: str = "sir",
    oversampling_factor: int = 32,
    max_sampling_batch_size: int = 10_000,
    device: Optional[str] = None,
    x_shape: Optional[torch.Size] = None,
):
    """
    Args:
        potential_fn: The potential function from which to draw samples. Must be a
            `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
        proposal: The proposal distribution.
        theta_transform: Transformation that is applied to parameters. Is not used
            during but only when calling `.map()`.
        method: Either of [`sir`|`importance`]. This sets the behavior of the
            `.sample()` method. With `sir`, approximate posterior samples are
            generated with sampling importance resampling (SIR). With
            `importance`, the `.sample()` method returns a tuple of samples and
            corresponding importance weights.
        oversampling_factor: Number of proposed samples from which only one is
            selected based on its importance weight.
        max_sampling_batch_size: The batch size of samples being drawn from the
            proposal at every iteration.
        device: Device on which to sample, e.g., "cpu", "cuda" or "cuda:0". If
            None, `potential_fn.device` is used.
        x_shape: Deprecated, should not be passed.
    """
    super().__init__(
        potential_fn,
        theta_transform=theta_transform,
        device=device,
        x_shape=x_shape,
    )

    self.proposal = proposal
    self._normalization_constant = None
    self.method = method

    self.oversampling_factor = oversampling_factor
    self.max_sampling_batch_size = max_sampling_batch_size

    self._purpose = (
        "It provides sampling-importance resampling (SIR) to .sample() from the "
        "posterior and can evaluate the _unnormalized_ posterior density with "
        ".log_prob()."
    )

estimate_normalization_constant(x, num_samples=10000, force_update=False)

Returns the normalization constant via importance sampling.

Parameters:

Name Type Description Default
num_samples int

Number of importance samples used for the estimate.

10000
force_update bool

Whether to re-calculate the normlization constant when x is unchanged and have a cached value.

False
Source code in sbi/inference/posteriors/importance_posterior.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
@torch.no_grad()
def estimate_normalization_constant(
    self, x: Tensor, num_samples: int = 10_000, force_update: bool = False
) -> Tensor:
    """Returns the normalization constant via importance sampling.

    Args:
        num_samples: Number of importance samples used for the estimate.
        force_update: Whether to re-calculate the normlization constant when x is
            unchanged and have a cached value.
    """
    # Check if the provided x matches the default x (short-circuit on identity).
    is_new_x = self.default_x is None or (
        x is not self.default_x and (x != self.default_x).any()
    )

    not_saved_at_default_x = self._normalization_constant is None

    if is_new_x:  # Calculate at x; don't save.
        _, log_importance_weights = importance_sample(
            self.potential_fn,
            proposal=self.proposal,
            num_samples=num_samples,
        )
        return torch.mean(torch.exp(log_importance_weights))
    elif not_saved_at_default_x or force_update:  # Calculate at default_x; save.
        assert self.default_x is not None
        _, log_importance_weights = importance_sample(
            self.potential_fn,
            proposal=self.proposal,
            num_samples=num_samples,
        )
        self._normalization_constant = torch.mean(torch.exp(log_importance_weights))

    return self._normalization_constant.to(self._device)  # type: ignore

log_prob(theta, x=None, track_gradients=False, normalization_constant_params=None)

Returns the log-probability of theta under the posterior.

The normalization constant is estimated with importance sampling.

Parameters:

Name Type Description Default
theta Tensor

Parameters \(\theta\).

required
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False
normalization_constant_params Optional[dict]

Parameters passed on to estimate_normalization_constant().

None

Returns:

Type Description
Tensor

len($\theta$)-shaped log-probability.

Source code in sbi/inference/posteriors/importance_posterior.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def log_prob(
    self,
    theta: Tensor,
    x: Optional[Tensor] = None,
    track_gradients: bool = False,
    normalization_constant_params: Optional[dict] = None,
) -> Tensor:
    r"""Returns the log-probability of theta under the posterior.

    The normalization constant is estimated with importance sampling.

    Args:
        theta: Parameters $\theta$.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.
        normalization_constant_params: Parameters passed on to
            `estimate_normalization_constant()`.

    Returns:
        `len($\theta$)`-shaped log-probability.
    """
    x = self._x_else_default_x(x)
    self.potential_fn.set_x(x)

    theta = ensure_theta_batched(torch.as_tensor(theta))

    with torch.set_grad_enabled(track_gradients):
        potential_values = self.potential_fn(
            theta.to(self._device), track_gradients=track_gradients
        )

        if normalization_constant_params is None:
            normalization_constant_params = dict()  # use defaults
        normalization_constant = self.estimate_normalization_constant(
            x, **normalization_constant_params
        )

        return (potential_values - torch.log(normalization_constant)).to(
            self._device
        )

map(x=None, num_iter=1000, num_to_optimize=100, learning_rate=0.01, init_method='proposal', num_init_samples=1000, save_best_every=10, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
learning_rate float

Learning rate of the optimizer.

0.01
init_method Union[str, Tensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'proposal'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

1000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

100
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

10
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False
log_prob_kwargs

Will be empty for SNLE and SNRE. Will contain {‘norm_posterior’: True} for SNPE.

required

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/importance_posterior.py
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
def map(
    self,
    x: Optional[Tensor] = None,
    num_iter: int = 1_000,
    num_to_optimize: int = 100,
    learning_rate: float = 0.01,
    init_method: Union[str, Tensor] = "proposal",
    num_init_samples: int = 1_000,
    save_best_every: int = 10,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from the
            posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.
        log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
            {'norm_posterior': True} for SNPE.

    Returns:
        The MAP estimate.
    """
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, method=None, oversampling_factor=32, max_sampling_batch_size=10000, sample_with=None, show_progress_bars=False)

Return samples from the approximate posterior distribution.

Parameters:

Name Type Description Default
sample_shape Shape

Shape of samples that are drawn from posterior.

Size()
x Optional[Tensor]

Observed data.

None
method Optional[str]

Either of [sir|importance]. This sets the behavior of the .sample() method. With sir, approximate posterior samples are generated with sampling importance resampling (SIR). With importance, the .sample() method returns a tuple of samples and corresponding importance weights.

None
oversampling_factor int

Number of proposed samples from which only one is selected based on its importance weight.

32
max_sampling_batch_size int

The batch size of samples being drawn from the proposal at every iteration.

10000
show_progress_bars bool

Whether to show a progressbar during sampling.

False
Source code in sbi/inference/posteriors/importance_posterior.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    method: Optional[str] = None,
    oversampling_factor: int = 32,
    max_sampling_batch_size: int = 10_000,
    sample_with: Optional[str] = None,
    show_progress_bars: bool = False,
) -> Union[Tensor, Tuple[Tensor, Tensor]]:
    """Return samples from the approximate posterior distribution.

    Args:
        sample_shape: Shape of samples that are drawn from posterior.
        x: Observed data.
        method: Either of [`sir`|`importance`]. This sets the behavior of the
            `.sample()` method. With `sir`, approximate posterior samples are
            generated with sampling importance resampling (SIR). With
            `importance`, the `.sample()` method returns a tuple of samples and
            corresponding importance weights.
        oversampling_factor: Number of proposed samples from which only one is
            selected based on its importance weight.
        max_sampling_batch_size: The batch size of samples being drawn from the
            proposal at every iteration.
        show_progress_bars: Whether to show a progressbar during sampling.
    """

    method = self.method if method is None else method

    if sample_with is not None:
        raise ValueError(
            f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
            f"`sample_with` is no longer supported. You have to rerun "
            f"`.build_posterior(sample_with={sample_with}).`"
        )

    self.potential_fn.set_x(self._x_else_default_x(x))

    if method == "sir":
        return self._sir_sample(
            sample_shape,
            oversampling_factor=oversampling_factor,
            max_sampling_batch_size=max_sampling_batch_size,
            show_progress_bars=show_progress_bars,
        )
    elif method == "importance":
        return self._importance_sample(sample_shape)
    else:
        raise NameError

MCMCPosterior

Bases: NeuralPosterior

Provides MCMC to sample from the posterior.

SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC.

Source code in sbi/inference/posteriors/mcmc_posterior.py
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
class MCMCPosterior(NeuralPosterior):
    r"""Provides MCMC to sample from the posterior.<br/><br/>
    SNLE or SNRE train neural networks to approximate the likelihood(-ratios).
    `MCMCPosterior` allows to sample from the posterior with MCMC.
    """

    def __init__(
        self,
        potential_fn: Union[Callable, BasePotential],
        proposal: Any,
        theta_transform: Optional[TorchTransform] = None,
        method: str = "slice_np_vectorized",
        thin: int = -1,
        warmup_steps: int = 200,
        num_chains: int = 20,
        init_strategy: str = "resample",
        init_strategy_parameters: Optional[Dict[str, Any]] = None,
        init_strategy_num_candidates: Optional[int] = None,
        num_workers: int = 1,
        mp_context: str = "spawn",
        device: Optional[str] = None,
        x_shape: Optional[torch.Size] = None,
    ):
        """
        Args:
            potential_fn: The potential function from which to draw samples. Must be a
                `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
            proposal: Proposal distribution that is used to initialize the MCMC chain.
            theta_transform: Transformation that will be applied during sampling.
                Allows to perform MCMC in unconstrained space.
            method: Method used for MCMC sampling, one of `slice_np`,
                `slice_np_vectorized`, `hmc_pyro`, `nuts_pyro`, `slice_pymc`,
                `hmc_pymc`, `nuts_pymc`. `slice_np` is a custom
                numpy implementation of slice sampling. `slice_np_vectorized` is
                identical to `slice_np`, but if `num_chains>1`, the chains are
                vectorized for `slice_np_vectorized` whereas they are run sequentially
                for `slice_np`. The samplers ending on `_pyro` are using Pyro, and
                likewise the samplers ending on `_pymc` are using PyMC.
            thin: The thinning factor for the chain, default 1 (no thinning).
            warmup_steps: The initial number of samples to discard.
            num_chains: The number of chains. Should generally be at most
                `num_workers - 1`.
            init_strategy: The initialisation strategy for chains; `proposal` will draw
                init locations from `proposal`, whereas `sir` will use Sequential-
                Importance-Resampling (SIR). SIR initially samples
                `init_strategy_num_candidates` from the `proposal`, evaluates all of
                them under the `potential_fn` and `proposal`, and then resamples the
                initial locations with weights proportional to `exp(potential_fn -
                proposal.log_prob`. `resample` is the same as `sir` but
                uses `exp(potential_fn)` as weights.
            init_strategy_parameters: Dictionary of keyword arguments passed to the
                init strategy, e.g., for `init_strategy=sir` this could be
                `num_candidate_samples`, i.e., the number of candidates to find init
                locations (internal default is `1000`), or `device`.
            init_strategy_num_candidates: Number of candidates to find init
                 locations in `init_strategy=sir` (deprecated, use
                 init_strategy_parameters instead).
            num_workers: number of cpu cores used to parallelize mcmc
            mp_context: Multiprocessing start method, either `"fork"` or `"spawn"`
                (default), used by Pyro and PyMC samplers. `"fork"` can be significantly
                faster than `"spawn"` but is only supported on POSIX-based systems
                (e.g. Linux and macOS, not Windows).
            device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
                `potential_fn.device` is used.
            x_shape: Deprecated, should not be passed.
        """
        if method == "slice":
            warn(
                "The Pyro-based slice sampler is deprecated, and the method `slice` "
                "has been changed to `slice_np`, i.e., the custom "
                "numpy-based slice sampler.",
                DeprecationWarning,
                stacklevel=2,
            )
            method = "slice_np"

        thin = _process_thin_default(thin)

        super().__init__(
            potential_fn,
            theta_transform=theta_transform,
            device=device,
            x_shape=x_shape,
        )

        self.proposal = proposal
        self.method = method
        self.thin = thin
        self.warmup_steps = warmup_steps
        self.num_chains = num_chains
        self.init_strategy = init_strategy
        self.init_strategy_parameters = init_strategy_parameters or {}
        self.num_workers = num_workers
        self.mp_context = mp_context
        self._posterior_sampler = None
        # Hardcode parameter name to reduce clutter kwargs.
        self.param_name = "theta"

        if init_strategy_num_candidates is not None:
            warn(
                "Passing `init_strategy_num_candidates` is deprecated as of sbi "
                "v0.19.0. Instead, use e.g., `init_strategy_parameters "
                f"={'num_candidate_samples': 1000}`",
                stacklevel=2,
            )
            self.init_strategy_parameters["num_candidate_samples"] = (
                init_strategy_num_candidates
            )

        self.potential_ = self._prepare_potential(method)

        self._purpose = (
            "It provides MCMC to .sample() from the posterior and "
            "can evaluate the _unnormalized_ posterior density with .log_prob()."
        )

    @property
    def mcmc_method(self) -> str:
        """Returns MCMC method."""
        return self._mcmc_method

    @mcmc_method.setter
    def mcmc_method(self, method: str) -> None:
        """See `set_mcmc_method`."""
        self.set_mcmc_method(method)

    @property
    def posterior_sampler(self):
        """Returns sampler created by `sample`."""
        return self._posterior_sampler

    def set_mcmc_method(self, method: str) -> "NeuralPosterior":
        """Sets sampling method to for MCMC and returns `NeuralPosterior`.

        Args:
            method: Method to use.

        Returns:
            `NeuralPosterior` for chainable calls.
        """
        self._mcmc_method = method
        return self

    def log_prob(
        self, theta: Tensor, x: Optional[Tensor] = None, track_gradients: bool = False
    ) -> Tensor:
        r"""Returns the log-probability of theta under the posterior.

        Args:
            theta: Parameters $\theta$.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.

        Returns:
            `len($\theta$)`-shaped log-probability.
        """
        warn(
            "`.log_prob()` is deprecated for methods that can only evaluate the "
            "log-probability up to a normalizing constant. Use `.potential()` instead.",
            stacklevel=2,
        )
        warn("The log-probability is unnormalized!", stacklevel=2)

        self.potential_fn.set_x(self._x_else_default_x(x))

        theta = ensure_theta_batched(torch.as_tensor(theta))
        return self.potential_fn(
            theta.to(self._device), track_gradients=track_gradients
        )

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        method: Optional[str] = None,
        thin: Optional[int] = None,
        warmup_steps: Optional[int] = None,
        num_chains: Optional[int] = None,
        init_strategy: Optional[str] = None,
        init_strategy_parameters: Optional[Dict[str, Any]] = None,
        init_strategy_num_candidates: Optional[int] = None,
        mcmc_parameters: Optional[Dict] = None,
        mcmc_method: Optional[str] = None,
        sample_with: Optional[str] = None,
        num_workers: Optional[int] = None,
        mp_context: Optional[str] = None,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Return samples from posterior distribution $p(\theta|x)$ with MCMC.

        Check the `__init__()` method for a description of all arguments as well as
        their default values.

        Args:
            sample_shape: Desired shape of samples that are drawn from posterior. If
                sample_shape is multidimensional we simply draw `sample_shape.numel()`
                samples and then reshape into the desired shape.
            mcmc_parameters: Dictionary that is passed only to support the API of
                `sbi` v0.17.2 or older.
            mcmc_method: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. Please use `method` instead.
            sample_with: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples from posterior.
        """

        self.potential_fn.set_x(self._x_else_default_x(x))

        # Replace arguments that were not passed with their default.
        method = self.method if method is None else method
        thin = self.thin if thin is None else thin
        warmup_steps = self.warmup_steps if warmup_steps is None else warmup_steps
        num_chains = self.num_chains if num_chains is None else num_chains
        init_strategy = self.init_strategy if init_strategy is None else init_strategy
        num_workers = self.num_workers if num_workers is None else num_workers
        mp_context = self.mp_context if mp_context is None else mp_context
        init_strategy_parameters = (
            self.init_strategy_parameters
            if init_strategy_parameters is None
            else init_strategy_parameters
        )
        if init_strategy_num_candidates is not None:
            warn(
                "Passing `init_strategy_num_candidates` is deprecated as of sbi "
                "v0.19.0. Instead, use e.g., "
                f"`init_strategy_parameters={'num_candidate_samples': 1000}`",
                stacklevel=2,
            )
            self.init_strategy_parameters["num_candidate_samples"] = (
                init_strategy_num_candidates
            )
        if sample_with is not None:
            raise ValueError(
                f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
                "`sample_with` is no longer supported. You have to rerun "
                f"`.build_posterior(sample_with={sample_with}).`"
            )
        if mcmc_method is not None:
            warn(
                "You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this "
                "is deprecated and will be removed in a future release. Use `method` "
                "instead of `mcmc_method`.",
                stacklevel=2,
            )
            method = mcmc_method
        if mcmc_parameters:
            warn(
                "You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this "
                "is deprecated and will be removed in a future release. Instead, pass "
                "the variable to `.sample()` directly, e.g. "
                "`posterior.sample((1,), num_chains=5)`.",
                stacklevel=2,
            )
        # The following lines are only for backwards compatibility with sbi v0.17.2 or
        # older.
        m_p = mcmc_parameters or {}  # define to shorten the variable name
        method = _maybe_use_dict_entry(method, "mcmc_method", m_p)
        thin = _maybe_use_dict_entry(thin, "thin", m_p)
        warmup_steps = _maybe_use_dict_entry(warmup_steps, "warmup_steps", m_p)
        num_chains = _maybe_use_dict_entry(num_chains, "num_chains", m_p)
        init_strategy = _maybe_use_dict_entry(init_strategy, "init_strategy", m_p)
        self.potential_ = self._prepare_potential(method)  # type: ignore

        initial_params = self._get_initial_params(
            init_strategy,  # type: ignore
            num_chains,  # type: ignore
            num_workers,
            show_progress_bars,
            **init_strategy_parameters,
        )
        num_samples = torch.Size(sample_shape).numel()

        track_gradients = method in ("hmc_pyro", "nuts_pyro", "hmc_pymc", "nuts_pymc")
        with torch.set_grad_enabled(track_gradients):
            if method in ("slice_np", "slice_np_vectorized"):
                transformed_samples = self._slice_np_mcmc(
                    num_samples=num_samples,
                    potential_function=self.potential_,
                    initial_params=initial_params,
                    thin=thin,  # type: ignore
                    warmup_steps=warmup_steps,  # type: ignore
                    vectorized=(method == "slice_np_vectorized"),
                    interchangeable_chains=True,
                    num_workers=num_workers,
                    show_progress_bars=show_progress_bars,
                )
            elif method in ("hmc_pyro", "nuts_pyro"):
                transformed_samples = self._pyro_mcmc(
                    num_samples=num_samples,
                    potential_function=self.potential_,
                    initial_params=initial_params,
                    mcmc_method=method,  # type: ignore
                    thin=thin,  # type: ignore
                    warmup_steps=warmup_steps,  # type: ignore
                    num_chains=num_chains,
                    show_progress_bars=show_progress_bars,
                    mp_context=mp_context,
                )
            elif method in ("hmc_pymc", "nuts_pymc", "slice_pymc"):
                transformed_samples = self._pymc_mcmc(
                    num_samples=num_samples,
                    potential_function=self.potential_,
                    initial_params=initial_params,
                    mcmc_method=method,  # type: ignore
                    thin=thin,  # type: ignore
                    warmup_steps=warmup_steps,  # type: ignore
                    num_chains=num_chains,
                    show_progress_bars=show_progress_bars,
                    mp_context=mp_context,
                )
            else:
                raise NameError(f"The sampling method {method} is not implemented!")

        samples = self.theta_transform.inv(transformed_samples)
        # NOTE: Currently MCMCPosteriors will require a single dimension for the
        # parameter dimension. With recent ConditionalDensity(Ratio) estimators, we
        # can have multiple dimensions for the parameter dimension.
        samples = samples.reshape((*sample_shape, -1))  # type: ignore

        return samples

    def sample_batched(
        self,
        sample_shape: Shape,
        x: Tensor,
        method: Optional[str] = None,
        thin: Optional[int] = None,
        warmup_steps: Optional[int] = None,
        num_chains: Optional[int] = None,
        init_strategy: Optional[str] = None,
        init_strategy_parameters: Optional[Dict[str, Any]] = None,
        num_workers: Optional[int] = None,
        mp_context: Optional[str] = None,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Given a batch of observations [x_1, ..., x_B] this function samples from
        posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
        manner.

        Check the `__init__()` method for a description of all arguments as well as
        their default values.

        Args:
            sample_shape: Desired shape of samples that are drawn from the posterior
                given every observation.
            x: A batch of observations, of shape `(batch_dim, event_shape_x)`.
                `batch_dim` corresponds to the number of observations to be
                drawn.
            method: Method used for MCMC sampling, e.g., "slice_np_vectorized".
            thin: The thinning factor for the chain, default 1 (no thinning).
            warmup_steps: The initial number of samples to discard.
            num_chains: The number of chains used for each `x` passed in the batch.
            init_strategy: The initialisation strategy for chains.
            init_strategy_parameters: Dictionary of keyword arguments passed to
                the init strategy.
            num_workers: number of cpu cores used to parallelize initial
                parameter generation and mcmc sampling.
            mp_context: Multiprocessing start method, either `"fork"` or `"spawn"`
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples from the posteriors of shape (*sample_shape, B, *input_shape)
        """

        # Replace arguments that were not passed with their default.
        method = self.method if method is None else method
        thin = self.thin if thin is None else thin
        warmup_steps = self.warmup_steps if warmup_steps is None else warmup_steps
        num_chains = self.num_chains if num_chains is None else num_chains
        init_strategy = self.init_strategy if init_strategy is None else init_strategy
        num_workers = self.num_workers if num_workers is None else num_workers
        mp_context = self.mp_context if mp_context is None else mp_context
        init_strategy_parameters = (
            self.init_strategy_parameters
            if init_strategy_parameters is None
            else init_strategy_parameters
        )

        assert (
            method == "slice_np_vectorized"
        ), "Batched sampling only supported for vectorized samplers!"

        # warn if num_chains is larger than num requested samples
        if num_chains > torch.Size(sample_shape).numel():
            warnings.warn(
                "The passed number of MCMC chains is larger than the number of "
                f"requested samples: {num_chains} > {torch.Size(sample_shape).numel()},"
                f" resetting it to {torch.Size(sample_shape).numel()}.",
                stacklevel=2,
            )
            num_chains = torch.Size(sample_shape).numel()

        # custom shape handling to make sure to match the batch size of x and theta
        # without unnecessary combinations.
        if len(x.shape) == 1:
            x = x.unsqueeze(0)
        batch_size = x.shape[0]

        x = reshape_to_batch_event(x, event_shape=x.shape[1:])

        # For batched sampling, we want `num_chains` for each observation in the batch.
        # Here we repeat the observations ABC -> AAABBBCCC, so that the chains are
        # in the order of the observations.
        x_ = x.repeat_interleave(num_chains, dim=0)

        self.potential_fn.set_x(x_, x_is_iid=False)
        self.potential_ = self._prepare_potential(method)  # type: ignore

        # For each observation in the batch, we have num_chains independent chains.
        num_chains_extended = batch_size * num_chains
        if num_chains_extended > 100:
            warnings.warn(
                "Note that for batched sampling, we use num_chains many chains for each"
                " x in the batch. With the given settings, this results in a large "
                f"number large number of chains ({num_chains_extended}), which can be "
                "slow and memory-intensive for vectorized MCMC. Consider reducing the "
                "number of chains.",
                stacklevel=2,
            )
        init_strategy_parameters["num_return_samples"] = num_chains_extended
        initial_params = self._get_initial_params_batched(
            x,
            init_strategy,  # type: ignore
            num_chains,  # type: ignore
            num_workers,
            show_progress_bars,
            **init_strategy_parameters,
        )
        # We need num_samples from each posterior in the batch
        num_samples = torch.Size(sample_shape).numel() * batch_size

        with torch.set_grad_enabled(False):
            transformed_samples = self._slice_np_mcmc(
                num_samples=num_samples,
                potential_function=self.potential_,
                initial_params=initial_params,
                thin=thin,  # type: ignore
                warmup_steps=warmup_steps,  # type: ignore
                vectorized=(method == "slice_np_vectorized"),
                interchangeable_chains=False,
                num_workers=num_workers,
                show_progress_bars=show_progress_bars,
            )

        # (num_chains_extended, samples_per_chain, *input_shape)
        samples_per_chain: Tensor = self.theta_transform.inv(transformed_samples)  # type: ignore
        dim_theta = samples_per_chain.shape[-1]
        # We need to collect samples for each x from the respective chains.
        # However, using samples.reshape(*sample_shape, batch_size, dim_theta)
        # does not combine the samples in the right order, since this mixes
        # samples that belong to different `x`. The following permute is a
        # workaround to reshape the samples in the right order.
        samples_per_x = samples_per_chain.reshape((
            batch_size,
            # We are flattening the sample shape here using -1 because we might have
            # generated more samples than requested (more chains, or multiple of
            # chains not matching sample_shape)
            -1,
            dim_theta,
        )).permute(1, 0, -1)

        # Shape is now (-1, batch_size, dim_theta)
        # We can now select the number of requested samples
        samples = samples_per_x[: torch.Size(sample_shape).numel()]
        # and reshape into (*sample_shape, batch_size, dim_theta)
        samples = samples.reshape((*sample_shape, batch_size, dim_theta))
        return samples

    def _build_mcmc_init_fn(
        self,
        proposal: Any,
        potential_fn: Callable,
        transform: torch_tf.Transform,
        init_strategy: str,
        **kwargs,
    ) -> Callable:
        """Return function that, when called, creates an initial parameter set for MCMC.

        Args:
            proposal: Proposal distribution.
            potential_fn: Potential function that the candidate samples are weighted
                with.
            init_strategy: Specifies the initialization method. Either of
                [`proposal`|`sir`|`resample`|`latest_sample`].
            kwargs: Passed on to init function. This way, init specific keywords can
                be set through `mcmc_parameters`. Unused arguments will be absorbed by
                the intitialization method.

        Returns: Initialization function.
        """
        if init_strategy == "proposal" or init_strategy == "prior":
            if init_strategy == "prior":
                warn(
                    "You set `init_strategy=prior`. As of sbi v0.18.0, this is "
                    "deprecated and it will be removed in a future release. Use "
                    "`init_strategy=proposal` instead.",
                    stacklevel=2,
                )
            return lambda: proposal_init(proposal, transform=transform, **kwargs)
        elif init_strategy == "sir":
            warn(
                "As of sbi v0.19.0, the behavior of the SIR initialization for MCMC "
                "has changed. If you wish to restore the behavior of sbi v0.18.0, set "
                "`init_strategy='resample'.`",
                stacklevel=2,
            )
            return lambda: sir_init(
                proposal, potential_fn, transform=transform, **kwargs
            )
        elif init_strategy == "resample":
            return lambda: resample_given_potential_fn(
                proposal, potential_fn, transform=transform, **kwargs
            )
        elif init_strategy == "latest_sample":
            latest_sample = IterateParameters(self._mcmc_init_params, **kwargs)
            return latest_sample
        else:
            raise NotImplementedError

    def _get_initial_params(
        self,
        init_strategy: str,
        num_chains: int,
        num_workers: int,
        show_progress_bars: bool,
        **kwargs,
    ) -> Tensor:
        """Return initial parameters for MCMC obtained with given init strategy.

        Parallelizes across CPU cores only for resample and SIR.

        Args:
            init_strategy: Specifies the initialization method. Either of
                [`proposal`|`sir`|`resample`|`latest_sample`].
            num_chains: number of MCMC chains, generates initial params for each
            num_workers: number of CPU cores for parallization
            show_progress_bars: whether to show progress bars for SIR init
            kwargs: Passed on to `_build_mcmc_init_fn`.

        Returns:
            Tensor: initial parameters, one for each chain
        """
        # Build init function
        init_fn = self._build_mcmc_init_fn(
            self.proposal,
            self.potential_fn,
            transform=self.theta_transform,
            init_strategy=init_strategy,  # type: ignore
            **kwargs,
        )

        # Parallelize inits for resampling only.
        if num_workers > 1 and (init_strategy == "resample" or init_strategy == "sir"):

            def seeded_init_fn(seed):
                torch.manual_seed(seed)
                return init_fn()

            seeds = torch.randint(high=2**31, size=(num_chains,))

            # Generate initial params parallelized over num_workers.
            initial_params = list(
                tqdm(
                    Parallel(return_as="generator", n_jobs=num_workers)(
                        delayed(seeded_init_fn)(seed) for seed in seeds
                    ),
                    total=len(seeds),
                    desc=f"""Generating {num_chains} MCMC inits with
                            {num_workers} workers.""",
                    disable=not show_progress_bars,
                )
            )
            initial_params = torch.cat(initial_params)  # type: ignore
        else:
            initial_params = torch.cat(
                [init_fn() for _ in range(num_chains)]  # type: ignore
            )
        return initial_params

    def _get_initial_params_batched(
        self,
        x: torch.Tensor,
        init_strategy: str,
        num_chains_per_x: int,
        num_workers: int,
        show_progress_bars: bool,
        **kwargs,
    ) -> Tensor:
        """Return initial parameters for MCMC for a batch of `x`, obtained with given
           init strategy.

        Parallelizes across CPU cores only for resample and SIR.

        Args:
            x: Batch of observations to create different initial parameters for.
            init_strategy: Specifies the initialization method. Either of
                [`proposal`|`sir`|`resample`|`latest_sample`].
            num_chains_per_x: number of MCMC chains for each x, generates initial params
                for each x
            num_workers: number of CPU cores for parallization
            show_progress_bars: whether to show progress bars for SIR init
            kwargs: Passed on to `_build_mcmc_init_fn`.

        Returns:
            Tensor: initial parameters, one for each chain
        """

        potential_ = deepcopy(self.potential_fn)
        initial_params = []
        init_fn = self._build_mcmc_init_fn(
            self.proposal,
            potential_fn=potential_,
            transform=self.theta_transform,
            init_strategy=init_strategy,  # type: ignore
            **kwargs,
        )
        for xi in x:
            # Build init function
            potential_.set_x(xi)

            # Parallelize inits for resampling or sir.
            if num_workers > 1 and (
                init_strategy == "resample" or init_strategy == "sir"
            ):

                def seeded_init_fn(seed):
                    torch.manual_seed(seed)
                    return init_fn()

                seeds = torch.randint(high=2**31, size=(num_chains_per_x,))

                # Generate initial params parallelized over num_workers.
                initial_params = initial_params + list(
                    tqdm(
                        Parallel(return_as="generator", n_jobs=num_workers)(
                            delayed(seeded_init_fn)(seed) for seed in seeds
                        ),
                        total=len(seeds),
                        desc=f"""Generating {num_chains_per_x} MCMC inits with
                                {num_workers} workers.""",
                        disable=not show_progress_bars,
                    )
                )

            else:
                initial_params = initial_params + [
                    init_fn() for _ in range(num_chains_per_x)
                ]  # type: ignore

        initial_params = torch.cat(initial_params)
        return initial_params

    def _slice_np_mcmc(
        self,
        num_samples: int,
        potential_function: Callable,
        initial_params: Tensor,
        thin: int,
        warmup_steps: int,
        vectorized: bool = False,
        interchangeable_chains=True,
        num_workers: int = 1,
        init_width: Union[float, ndarray] = 0.01,
        show_progress_bars: bool = True,
    ) -> Tensor:
        """Custom implementation of slice sampling using Numpy.

        Args:
            num_samples: Desired number of samples.
            potential_function: A callable **class**.
            initial_params: Initial parameters for MCMC chain.
            thin: Thinning (subsampling) factor, default 1 (no thinning).
            warmup_steps: Initial number of samples to discard.
            vectorized: Whether to use a vectorized implementation of the
                `SliceSampler`.
            interchangeable_chains: Whether chains are interchangeable, i.e., whether
                we can mix samples between chains.
            num_workers: Number of CPU cores to use.
            init_width: Inital width of brackets.
            show_progress_bars: Whether to show a progressbar during sampling;
                can only be turned off for vectorized sampler.

        Returns:
            Tensor of shape (num_samples, shape_of_single_theta).
        """

        num_chains, dim_samples = initial_params.shape

        if not vectorized:
            SliceSamplerMultiChain = SliceSamplerSerial
        else:
            SliceSamplerMultiChain = SliceSamplerVectorized

        def multi_obs_potential(params):
            # Params are of shape (num_chains * num_obs, event).
            all_potentials = potential_function(params)  # Shape: (num_chains, num_obs)
            return all_potentials.flatten()

        posterior_sampler = SliceSamplerMultiChain(
            init_params=tensor2numpy(initial_params),
            log_prob_fn=multi_obs_potential,
            num_chains=num_chains,
            thin=thin,
            verbose=show_progress_bars,
            num_workers=num_workers,
            init_width=init_width,
        )
        warmup_ = warmup_steps * thin
        num_samples_ = ceil((num_samples * thin) / num_chains)
        # Run mcmc including warmup
        samples = posterior_sampler.run(warmup_ + num_samples_)
        samples = samples[:, warmup_steps:, :]  # discard warmup steps
        samples = torch.from_numpy(samples)  # chains x samples x dim

        # Save posterior sampler.
        self._posterior_sampler = posterior_sampler

        # Save sample as potential next init (if init_strategy == 'latest_sample').
        self._mcmc_init_params = samples[:, -1, :].reshape(num_chains, dim_samples)

        # Update: If chains are interchangeable, return concatenated samples. Otherwise
        # return samples per chain.
        if interchangeable_chains:
            # Collect samples from all chains.
            samples = samples.reshape(-1, dim_samples)[:num_samples]

        return samples.type(torch.float32).to(self._device)

    def _pyro_mcmc(
        self,
        num_samples: int,
        potential_function: Callable,
        initial_params: Tensor,
        mcmc_method: str = "nuts_pyro",
        thin: int = -1,
        warmup_steps: int = 200,
        num_chains: Optional[int] = 1,
        show_progress_bars: bool = True,
        mp_context: str = "spawn",
    ) -> Tensor:
        r"""Return samples obtained using Pyro's HMC or NUTS sampler.

        Args:
            num_samples: Desired number of samples.
            potential_function: A callable **class**. A class, but not a function,
                is picklable for Pyro MCMC to use it across chains in parallel,
                even when the potential function requires evaluating a neural network.
            initial_params: Initial parameters for MCMC chain.
            mcmc_method: Pyro MCMC method to use, either `"hmc_pyro"` or
                `"nuts_pyro"` (default).
            thin: Thinning (subsampling) factor, default 1 (no thinning).
            warmup_steps: Initial number of samples to discard.
            num_chains: Whether to sample in parallel. If None, use all but one CPU.
            show_progress_bars: Whether to show a progressbar during sampling.

        Returns:
            Tensor of shape (num_samples, shape_of_single_theta).
        """
        thin = _process_thin_default(thin)
        num_chains = mp.cpu_count() - 1 if num_chains is None else num_chains
        kernels = dict(hmc_pyro=HMC, nuts_pyro=NUTS)

        sampler = MCMC(
            kernel=kernels[mcmc_method](potential_fn=potential_function),
            num_samples=ceil((thin * num_samples) / num_chains),
            warmup_steps=warmup_steps,
            initial_params={self.param_name: initial_params},
            num_chains=num_chains,
            mp_context=mp_context,
            disable_progbar=not show_progress_bars,
            transforms={},
        )
        sampler.run()
        samples = next(iter(sampler.get_samples().values())).reshape(
            -1,
            initial_params.shape[1],  # .shape[1] = dim of theta
        )

        # Save posterior sampler.
        self._posterior_sampler = sampler

        samples = samples[::thin][:num_samples]

        return samples.detach()

    def _pymc_mcmc(
        self,
        num_samples: int,
        potential_function: Callable,
        initial_params: Tensor,
        mcmc_method: str = "nuts_pymc",
        thin: int = -1,
        warmup_steps: int = 200,
        num_chains: Optional[int] = 1,
        show_progress_bars: bool = True,
        mp_context: str = "spawn",
    ) -> Tensor:
        r"""Return samples obtained using PyMC's HMC, NUTS or slice samplers.

        Args:
            num_samples: Desired number of samples.
            potential_function: A callable **class**. A class, but not a function,
                is picklable for PyMC MCMC to use it across chains in parallel,
                even when the potential function requires evaluating a neural network.
            initial_params: Initial parameters for MCMC chain.
            mcmc_method: mcmc_method: Pyro MCMC method to use, either `"hmc_pymc"` or
                `"slice_pymc"`, or `"nuts_pymc"` (default).
            thin: Thinning (subsampling) factor, default 1 (no thinning).
            warmup_steps: Initial number of samples to discard.
            num_chains: Whether to sample in parallel. If None, use all but one CPU.
            show_progress_bars: Whether to show a progressbar during sampling.

        Returns:
            Tensor of shape (num_samples, shape_of_single_theta).
        """
        thin = _process_thin_default(thin)
        num_chains = mp.cpu_count() - 1 if num_chains is None else num_chains
        steps = dict(slice_pymc="slice", hmc_pymc="hmc", nuts_pymc="nuts")

        sampler = PyMCSampler(
            potential_fn=potential_function,
            step=steps[mcmc_method],
            initvals=tensor2numpy(initial_params),
            draws=ceil((thin * num_samples) / num_chains),
            tune=warmup_steps,
            chains=num_chains,
            mp_ctx=mp_context,
            progressbar=show_progress_bars,
            param_name=self.param_name,
            device=self._device,
        )
        samples = sampler.run()
        samples = torch.from_numpy(samples).to(dtype=torch.float32, device=self._device)
        samples = samples.reshape(-1, initial_params.shape[1])

        # Save posterior sampler.
        self._posterior_sampler = sampler

        samples = samples[::thin][:num_samples]

        return samples

    def _prepare_potential(self, method: str) -> Callable:
        """Combines potential and transform and takes care of gradients and pyro.

        Args:
            method: Which MCMC method to use.

        Returns:
            A potential function that is ready to be used in MCMC.
        """
        if method in ("hmc_pyro", "nuts_pyro"):
            track_gradients = True
            pyro = True
        elif method in ("hmc_pymc", "nuts_pymc"):
            track_gradients = True
            pyro = False
        elif method in ("slice_np", "slice_np_vectorized", "slice_pymc"):
            track_gradients = False
            pyro = False
        else:
            if "hmc" in method or "nuts" in method:
                warn(
                    "The kwargs 'hmc' and 'nuts' are deprecated. Use 'hmc_pyro', "
                    "'nuts_pyro', 'hmc_pymc', or 'nuts_pymc' instead.",
                    DeprecationWarning,
                    stacklevel=2,
                )
            raise NotImplementedError(f"MCMC method {method} is not implemented.")

        prepared_potential = partial(
            transformed_potential,
            potential_fn=self.potential_fn,
            theta_transform=self.theta_transform,
            device=self._device,
            track_gradients=track_gradients,
        )
        if pyro:
            prepared_potential = partial(
                pyro_potential_wrapper, potential=prepared_potential
            )

        return prepared_potential

    def map(
        self,
        x: Optional[Tensor] = None,
        num_iter: int = 1_000,
        num_to_optimize: int = 100,
        learning_rate: float = 0.01,
        init_method: Union[str, Tensor] = "proposal",
        num_init_samples: int = 1_000,
        save_best_every: int = 10,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from
                the posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.
            log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
                {'norm_posterior': True} for SNPE.

        Returns:
            The MAP estimate.
        """
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

    def get_arviz_inference_data(self) -> InferenceData:
        """Returns arviz InferenceData object constructed most recent samples.

        Note: the InferenceData is constructed using the posterior samples generated in
        most recent call to `.sample(...)`.

        For Pyro and PyMC samplers, InferenceData will contain diagnostics, but for
        sbi slice samplers, only the samples are added.

        Returns:
            inference_data: Arviz InferenceData object.
        """
        assert (
            self._posterior_sampler is not None
        ), """No samples have been generated, call .sample() first."""

        sampler: Union[
            MCMC, SliceSamplerSerial, SliceSamplerVectorized, PyMCSampler
        ] = self._posterior_sampler

        # If Pyro sampler and samples not transformed, use arviz' from_pyro.
        if isinstance(sampler, (HMC, NUTS)) and isinstance(
            self.theta_transform, torch_tf.IndependentTransform
        ):
            inference_data = az.from_pyro(sampler)
        # If PyMC sampler and samples not transformed, get cached InferenceData.
        elif isinstance(sampler, PyMCSampler) and isinstance(
            self.theta_transform, torch_tf.IndependentTransform
        ):
            inference_data = sampler.get_inference_data()

        # otherwise get samples from sampler and transform to original space.
        else:
            transformed_samples = sampler.get_samples(group_by_chain=True)
            # Pyro samplers returns dicts, get values.
            if isinstance(transformed_samples, Dict):
                # popitem gets last items, [1] get the values as tensor.
                transformed_samples = transformed_samples.popitem()[1]
            # Our slice samplers return numpy arrays.
            elif isinstance(transformed_samples, ndarray):
                transformed_samples = torch.from_numpy(transformed_samples).type(
                    torch.float32
                )
            # For MultipleIndependent priors transforms first dim must be batch dim.
            # thus, reshape back and forth to have batch dim in front.
            samples_shape = transformed_samples.shape
            samples = self.theta_transform.inv(  # type: ignore
                transformed_samples.reshape(-1, samples_shape[-1])
            ).reshape(  # type: ignore
                *samples_shape
            )

            inference_data = az.convert_to_inference_data({
                f"{self.param_name}": samples
            })

        return inference_data

mcmc_method: str property writable

Returns MCMC method.

posterior_sampler property

Returns sampler created by sample.

__init__(potential_fn, proposal, theta_transform=None, method='slice_np_vectorized', thin=-1, warmup_steps=200, num_chains=20, init_strategy='resample', init_strategy_parameters=None, init_strategy_num_candidates=None, num_workers=1, mp_context='spawn', device=None, x_shape=None)

Parameters:

Name Type Description Default
potential_fn Union[Callable, BasePotential]

The potential function from which to draw samples. Must be a BasePotential or a Callable which takes theta and x_o as inputs.

required
proposal Any

Proposal distribution that is used to initialize the MCMC chain.

required
theta_transform Optional[TorchTransform]

Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space.

None
method str

Method used for MCMC sampling, one of slice_np, slice_np_vectorized, hmc_pyro, nuts_pyro, slice_pymc, hmc_pymc, nuts_pymc. slice_np is a custom numpy implementation of slice sampling. slice_np_vectorized is identical to slice_np, but if num_chains>1, the chains are vectorized for slice_np_vectorized whereas they are run sequentially for slice_np. The samplers ending on _pyro are using Pyro, and likewise the samplers ending on _pymc are using PyMC.

'slice_np_vectorized'
thin int

The thinning factor for the chain, default 1 (no thinning).

-1
warmup_steps int

The initial number of samples to discard.

200
num_chains int

The number of chains. Should generally be at most num_workers - 1.

20
init_strategy str

The initialisation strategy for chains; proposal will draw init locations from proposal, whereas sir will use Sequential- Importance-Resampling (SIR). SIR initially samples init_strategy_num_candidates from the proposal, evaluates all of them under the potential_fn and proposal, and then resamples the initial locations with weights proportional to exp(potential_fn - proposal.log_prob. resample is the same as sir but uses exp(potential_fn) as weights.

'resample'
init_strategy_parameters Optional[Dict[str, Any]]

Dictionary of keyword arguments passed to the init strategy, e.g., for init_strategy=sir this could be num_candidate_samples, i.e., the number of candidates to find init locations (internal default is 1000), or device.

None
init_strategy_num_candidates Optional[int]

Number of candidates to find init locations in init_strategy=sir (deprecated, use init_strategy_parameters instead).

None
num_workers int

number of cpu cores used to parallelize mcmc

1
mp_context str

Multiprocessing start method, either "fork" or "spawn" (default), used by Pyro and PyMC samplers. "fork" can be significantly faster than "spawn" but is only supported on POSIX-based systems (e.g. Linux and macOS, not Windows).

'spawn'
device Optional[str]

Training device, e.g., “cpu”, “cuda” or “cuda:0”. If None, potential_fn.device is used.

None
x_shape Optional[Size]

Deprecated, should not be passed.

None
Source code in sbi/inference/posteriors/mcmc_posterior.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def __init__(
    self,
    potential_fn: Union[Callable, BasePotential],
    proposal: Any,
    theta_transform: Optional[TorchTransform] = None,
    method: str = "slice_np_vectorized",
    thin: int = -1,
    warmup_steps: int = 200,
    num_chains: int = 20,
    init_strategy: str = "resample",
    init_strategy_parameters: Optional[Dict[str, Any]] = None,
    init_strategy_num_candidates: Optional[int] = None,
    num_workers: int = 1,
    mp_context: str = "spawn",
    device: Optional[str] = None,
    x_shape: Optional[torch.Size] = None,
):
    """
    Args:
        potential_fn: The potential function from which to draw samples. Must be a
            `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
        proposal: Proposal distribution that is used to initialize the MCMC chain.
        theta_transform: Transformation that will be applied during sampling.
            Allows to perform MCMC in unconstrained space.
        method: Method used for MCMC sampling, one of `slice_np`,
            `slice_np_vectorized`, `hmc_pyro`, `nuts_pyro`, `slice_pymc`,
            `hmc_pymc`, `nuts_pymc`. `slice_np` is a custom
            numpy implementation of slice sampling. `slice_np_vectorized` is
            identical to `slice_np`, but if `num_chains>1`, the chains are
            vectorized for `slice_np_vectorized` whereas they are run sequentially
            for `slice_np`. The samplers ending on `_pyro` are using Pyro, and
            likewise the samplers ending on `_pymc` are using PyMC.
        thin: The thinning factor for the chain, default 1 (no thinning).
        warmup_steps: The initial number of samples to discard.
        num_chains: The number of chains. Should generally be at most
            `num_workers - 1`.
        init_strategy: The initialisation strategy for chains; `proposal` will draw
            init locations from `proposal`, whereas `sir` will use Sequential-
            Importance-Resampling (SIR). SIR initially samples
            `init_strategy_num_candidates` from the `proposal`, evaluates all of
            them under the `potential_fn` and `proposal`, and then resamples the
            initial locations with weights proportional to `exp(potential_fn -
            proposal.log_prob`. `resample` is the same as `sir` but
            uses `exp(potential_fn)` as weights.
        init_strategy_parameters: Dictionary of keyword arguments passed to the
            init strategy, e.g., for `init_strategy=sir` this could be
            `num_candidate_samples`, i.e., the number of candidates to find init
            locations (internal default is `1000`), or `device`.
        init_strategy_num_candidates: Number of candidates to find init
             locations in `init_strategy=sir` (deprecated, use
             init_strategy_parameters instead).
        num_workers: number of cpu cores used to parallelize mcmc
        mp_context: Multiprocessing start method, either `"fork"` or `"spawn"`
            (default), used by Pyro and PyMC samplers. `"fork"` can be significantly
            faster than `"spawn"` but is only supported on POSIX-based systems
            (e.g. Linux and macOS, not Windows).
        device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
            `potential_fn.device` is used.
        x_shape: Deprecated, should not be passed.
    """
    if method == "slice":
        warn(
            "The Pyro-based slice sampler is deprecated, and the method `slice` "
            "has been changed to `slice_np`, i.e., the custom "
            "numpy-based slice sampler.",
            DeprecationWarning,
            stacklevel=2,
        )
        method = "slice_np"

    thin = _process_thin_default(thin)

    super().__init__(
        potential_fn,
        theta_transform=theta_transform,
        device=device,
        x_shape=x_shape,
    )

    self.proposal = proposal
    self.method = method
    self.thin = thin
    self.warmup_steps = warmup_steps
    self.num_chains = num_chains
    self.init_strategy = init_strategy
    self.init_strategy_parameters = init_strategy_parameters or {}
    self.num_workers = num_workers
    self.mp_context = mp_context
    self._posterior_sampler = None
    # Hardcode parameter name to reduce clutter kwargs.
    self.param_name = "theta"

    if init_strategy_num_candidates is not None:
        warn(
            "Passing `init_strategy_num_candidates` is deprecated as of sbi "
            "v0.19.0. Instead, use e.g., `init_strategy_parameters "
            f"={'num_candidate_samples': 1000}`",
            stacklevel=2,
        )
        self.init_strategy_parameters["num_candidate_samples"] = (
            init_strategy_num_candidates
        )

    self.potential_ = self._prepare_potential(method)

    self._purpose = (
        "It provides MCMC to .sample() from the posterior and "
        "can evaluate the _unnormalized_ posterior density with .log_prob()."
    )

get_arviz_inference_data()

Returns arviz InferenceData object constructed most recent samples.

Note: the InferenceData is constructed using the posterior samples generated in most recent call to .sample(...).

For Pyro and PyMC samplers, InferenceData will contain diagnostics, but for sbi slice samplers, only the samples are added.

Returns:

Name Type Description
inference_data InferenceData

Arviz InferenceData object.

Source code in sbi/inference/posteriors/mcmc_posterior.py
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
def get_arviz_inference_data(self) -> InferenceData:
    """Returns arviz InferenceData object constructed most recent samples.

    Note: the InferenceData is constructed using the posterior samples generated in
    most recent call to `.sample(...)`.

    For Pyro and PyMC samplers, InferenceData will contain diagnostics, but for
    sbi slice samplers, only the samples are added.

    Returns:
        inference_data: Arviz InferenceData object.
    """
    assert (
        self._posterior_sampler is not None
    ), """No samples have been generated, call .sample() first."""

    sampler: Union[
        MCMC, SliceSamplerSerial, SliceSamplerVectorized, PyMCSampler
    ] = self._posterior_sampler

    # If Pyro sampler and samples not transformed, use arviz' from_pyro.
    if isinstance(sampler, (HMC, NUTS)) and isinstance(
        self.theta_transform, torch_tf.IndependentTransform
    ):
        inference_data = az.from_pyro(sampler)
    # If PyMC sampler and samples not transformed, get cached InferenceData.
    elif isinstance(sampler, PyMCSampler) and isinstance(
        self.theta_transform, torch_tf.IndependentTransform
    ):
        inference_data = sampler.get_inference_data()

    # otherwise get samples from sampler and transform to original space.
    else:
        transformed_samples = sampler.get_samples(group_by_chain=True)
        # Pyro samplers returns dicts, get values.
        if isinstance(transformed_samples, Dict):
            # popitem gets last items, [1] get the values as tensor.
            transformed_samples = transformed_samples.popitem()[1]
        # Our slice samplers return numpy arrays.
        elif isinstance(transformed_samples, ndarray):
            transformed_samples = torch.from_numpy(transformed_samples).type(
                torch.float32
            )
        # For MultipleIndependent priors transforms first dim must be batch dim.
        # thus, reshape back and forth to have batch dim in front.
        samples_shape = transformed_samples.shape
        samples = self.theta_transform.inv(  # type: ignore
            transformed_samples.reshape(-1, samples_shape[-1])
        ).reshape(  # type: ignore
            *samples_shape
        )

        inference_data = az.convert_to_inference_data({
            f"{self.param_name}": samples
        })

    return inference_data

log_prob(theta, x=None, track_gradients=False)

Returns the log-probability of theta under the posterior.

Parameters:

Name Type Description Default
theta Tensor

Parameters \(\theta\).

required
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False

Returns:

Type Description
Tensor

len($\theta$)-shaped log-probability.

Source code in sbi/inference/posteriors/mcmc_posterior.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def log_prob(
    self, theta: Tensor, x: Optional[Tensor] = None, track_gradients: bool = False
) -> Tensor:
    r"""Returns the log-probability of theta under the posterior.

    Args:
        theta: Parameters $\theta$.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.

    Returns:
        `len($\theta$)`-shaped log-probability.
    """
    warn(
        "`.log_prob()` is deprecated for methods that can only evaluate the "
        "log-probability up to a normalizing constant. Use `.potential()` instead.",
        stacklevel=2,
    )
    warn("The log-probability is unnormalized!", stacklevel=2)

    self.potential_fn.set_x(self._x_else_default_x(x))

    theta = ensure_theta_batched(torch.as_tensor(theta))
    return self.potential_fn(
        theta.to(self._device), track_gradients=track_gradients
    )

map(x=None, num_iter=1000, num_to_optimize=100, learning_rate=0.01, init_method='proposal', num_init_samples=1000, save_best_every=10, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
learning_rate float

Learning rate of the optimizer.

0.01
init_method Union[str, Tensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'proposal'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

1000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

100
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

10
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False
log_prob_kwargs

Will be empty for SNLE and SNRE. Will contain {‘norm_posterior’: True} for SNPE.

required

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/mcmc_posterior.py
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
def map(
    self,
    x: Optional[Tensor] = None,
    num_iter: int = 1_000,
    num_to_optimize: int = 100,
    learning_rate: float = 0.01,
    init_method: Union[str, Tensor] = "proposal",
    num_init_samples: int = 1_000,
    save_best_every: int = 10,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from
            the posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.
        log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
            {'norm_posterior': True} for SNPE.

    Returns:
        The MAP estimate.
    """
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, method=None, thin=None, warmup_steps=None, num_chains=None, init_strategy=None, init_strategy_parameters=None, init_strategy_num_candidates=None, mcmc_parameters=None, mcmc_method=None, sample_with=None, num_workers=None, mp_context=None, show_progress_bars=True)

Return samples from posterior distribution \(p(\theta|x)\) with MCMC.

Check the __init__() method for a description of all arguments as well as their default values.

Parameters:

Name Type Description Default
sample_shape Shape

Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape.

Size()
mcmc_parameters Optional[Dict]

Dictionary that is passed only to support the API of sbi v0.17.2 or older.

None
mcmc_method Optional[str]

This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. Please use method instead.

None
sample_with Optional[str]

This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error.

None
show_progress_bars bool

Whether to show sampling progress monitor.

True

Returns:

Type Description
Tensor

Samples from posterior.

Source code in sbi/inference/posteriors/mcmc_posterior.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    method: Optional[str] = None,
    thin: Optional[int] = None,
    warmup_steps: Optional[int] = None,
    num_chains: Optional[int] = None,
    init_strategy: Optional[str] = None,
    init_strategy_parameters: Optional[Dict[str, Any]] = None,
    init_strategy_num_candidates: Optional[int] = None,
    mcmc_parameters: Optional[Dict] = None,
    mcmc_method: Optional[str] = None,
    sample_with: Optional[str] = None,
    num_workers: Optional[int] = None,
    mp_context: Optional[str] = None,
    show_progress_bars: bool = True,
) -> Tensor:
    r"""Return samples from posterior distribution $p(\theta|x)$ with MCMC.

    Check the `__init__()` method for a description of all arguments as well as
    their default values.

    Args:
        sample_shape: Desired shape of samples that are drawn from posterior. If
            sample_shape is multidimensional we simply draw `sample_shape.numel()`
            samples and then reshape into the desired shape.
        mcmc_parameters: Dictionary that is passed only to support the API of
            `sbi` v0.17.2 or older.
        mcmc_method: This argument only exists to keep backward-compatibility with
            `sbi` v0.17.2 or older. Please use `method` instead.
        sample_with: This argument only exists to keep backward-compatibility with
            `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
        show_progress_bars: Whether to show sampling progress monitor.

    Returns:
        Samples from posterior.
    """

    self.potential_fn.set_x(self._x_else_default_x(x))

    # Replace arguments that were not passed with their default.
    method = self.method if method is None else method
    thin = self.thin if thin is None else thin
    warmup_steps = self.warmup_steps if warmup_steps is None else warmup_steps
    num_chains = self.num_chains if num_chains is None else num_chains
    init_strategy = self.init_strategy if init_strategy is None else init_strategy
    num_workers = self.num_workers if num_workers is None else num_workers
    mp_context = self.mp_context if mp_context is None else mp_context
    init_strategy_parameters = (
        self.init_strategy_parameters
        if init_strategy_parameters is None
        else init_strategy_parameters
    )
    if init_strategy_num_candidates is not None:
        warn(
            "Passing `init_strategy_num_candidates` is deprecated as of sbi "
            "v0.19.0. Instead, use e.g., "
            f"`init_strategy_parameters={'num_candidate_samples': 1000}`",
            stacklevel=2,
        )
        self.init_strategy_parameters["num_candidate_samples"] = (
            init_strategy_num_candidates
        )
    if sample_with is not None:
        raise ValueError(
            f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
            "`sample_with` is no longer supported. You have to rerun "
            f"`.build_posterior(sample_with={sample_with}).`"
        )
    if mcmc_method is not None:
        warn(
            "You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this "
            "is deprecated and will be removed in a future release. Use `method` "
            "instead of `mcmc_method`.",
            stacklevel=2,
        )
        method = mcmc_method
    if mcmc_parameters:
        warn(
            "You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this "
            "is deprecated and will be removed in a future release. Instead, pass "
            "the variable to `.sample()` directly, e.g. "
            "`posterior.sample((1,), num_chains=5)`.",
            stacklevel=2,
        )
    # The following lines are only for backwards compatibility with sbi v0.17.2 or
    # older.
    m_p = mcmc_parameters or {}  # define to shorten the variable name
    method = _maybe_use_dict_entry(method, "mcmc_method", m_p)
    thin = _maybe_use_dict_entry(thin, "thin", m_p)
    warmup_steps = _maybe_use_dict_entry(warmup_steps, "warmup_steps", m_p)
    num_chains = _maybe_use_dict_entry(num_chains, "num_chains", m_p)
    init_strategy = _maybe_use_dict_entry(init_strategy, "init_strategy", m_p)
    self.potential_ = self._prepare_potential(method)  # type: ignore

    initial_params = self._get_initial_params(
        init_strategy,  # type: ignore
        num_chains,  # type: ignore
        num_workers,
        show_progress_bars,
        **init_strategy_parameters,
    )
    num_samples = torch.Size(sample_shape).numel()

    track_gradients = method in ("hmc_pyro", "nuts_pyro", "hmc_pymc", "nuts_pymc")
    with torch.set_grad_enabled(track_gradients):
        if method in ("slice_np", "slice_np_vectorized"):
            transformed_samples = self._slice_np_mcmc(
                num_samples=num_samples,
                potential_function=self.potential_,
                initial_params=initial_params,
                thin=thin,  # type: ignore
                warmup_steps=warmup_steps,  # type: ignore
                vectorized=(method == "slice_np_vectorized"),
                interchangeable_chains=True,
                num_workers=num_workers,
                show_progress_bars=show_progress_bars,
            )
        elif method in ("hmc_pyro", "nuts_pyro"):
            transformed_samples = self._pyro_mcmc(
                num_samples=num_samples,
                potential_function=self.potential_,
                initial_params=initial_params,
                mcmc_method=method,  # type: ignore
                thin=thin,  # type: ignore
                warmup_steps=warmup_steps,  # type: ignore
                num_chains=num_chains,
                show_progress_bars=show_progress_bars,
                mp_context=mp_context,
            )
        elif method in ("hmc_pymc", "nuts_pymc", "slice_pymc"):
            transformed_samples = self._pymc_mcmc(
                num_samples=num_samples,
                potential_function=self.potential_,
                initial_params=initial_params,
                mcmc_method=method,  # type: ignore
                thin=thin,  # type: ignore
                warmup_steps=warmup_steps,  # type: ignore
                num_chains=num_chains,
                show_progress_bars=show_progress_bars,
                mp_context=mp_context,
            )
        else:
            raise NameError(f"The sampling method {method} is not implemented!")

    samples = self.theta_transform.inv(transformed_samples)
    # NOTE: Currently MCMCPosteriors will require a single dimension for the
    # parameter dimension. With recent ConditionalDensity(Ratio) estimators, we
    # can have multiple dimensions for the parameter dimension.
    samples = samples.reshape((*sample_shape, -1))  # type: ignore

    return samples

sample_batched(sample_shape, x, method=None, thin=None, warmup_steps=None, num_chains=None, init_strategy=None, init_strategy_parameters=None, num_workers=None, mp_context=None, show_progress_bars=True)

Given a batch of observations [x_1, …, x_B] this function samples from posteriors \(p(\theta|x_1)\), … ,\(p(\theta|x_B)\), in a batched (i.e. vectorized) manner.

Check the __init__() method for a description of all arguments as well as their default values.

Parameters:

Name Type Description Default
sample_shape Shape

Desired shape of samples that are drawn from the posterior given every observation.

required
x Tensor

A batch of observations, of shape (batch_dim, event_shape_x). batch_dim corresponds to the number of observations to be drawn.

required
method Optional[str]

Method used for MCMC sampling, e.g., “slice_np_vectorized”.

None
thin Optional[int]

The thinning factor for the chain, default 1 (no thinning).

None
warmup_steps Optional[int]

The initial number of samples to discard.

None
num_chains Optional[int]

The number of chains used for each x passed in the batch.

None
init_strategy Optional[str]

The initialisation strategy for chains.

None
init_strategy_parameters Optional[Dict[str, Any]]

Dictionary of keyword arguments passed to the init strategy.

None
num_workers Optional[int]

number of cpu cores used to parallelize initial parameter generation and mcmc sampling.

None
mp_context Optional[str]

Multiprocessing start method, either "fork" or "spawn"

None
show_progress_bars bool

Whether to show sampling progress monitor.

True

Returns:

Type Description
Tensor

Samples from the posteriors of shape (*sample_shape, B, *input_shape)

Source code in sbi/inference/posteriors/mcmc_posterior.py
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
def sample_batched(
    self,
    sample_shape: Shape,
    x: Tensor,
    method: Optional[str] = None,
    thin: Optional[int] = None,
    warmup_steps: Optional[int] = None,
    num_chains: Optional[int] = None,
    init_strategy: Optional[str] = None,
    init_strategy_parameters: Optional[Dict[str, Any]] = None,
    num_workers: Optional[int] = None,
    mp_context: Optional[str] = None,
    show_progress_bars: bool = True,
) -> Tensor:
    r"""Given a batch of observations [x_1, ..., x_B] this function samples from
    posteriors $p(\theta|x_1)$, ... ,$p(\theta|x_B)$, in a batched (i.e. vectorized)
    manner.

    Check the `__init__()` method for a description of all arguments as well as
    their default values.

    Args:
        sample_shape: Desired shape of samples that are drawn from the posterior
            given every observation.
        x: A batch of observations, of shape `(batch_dim, event_shape_x)`.
            `batch_dim` corresponds to the number of observations to be
            drawn.
        method: Method used for MCMC sampling, e.g., "slice_np_vectorized".
        thin: The thinning factor for the chain, default 1 (no thinning).
        warmup_steps: The initial number of samples to discard.
        num_chains: The number of chains used for each `x` passed in the batch.
        init_strategy: The initialisation strategy for chains.
        init_strategy_parameters: Dictionary of keyword arguments passed to
            the init strategy.
        num_workers: number of cpu cores used to parallelize initial
            parameter generation and mcmc sampling.
        mp_context: Multiprocessing start method, either `"fork"` or `"spawn"`
        show_progress_bars: Whether to show sampling progress monitor.

    Returns:
        Samples from the posteriors of shape (*sample_shape, B, *input_shape)
    """

    # Replace arguments that were not passed with their default.
    method = self.method if method is None else method
    thin = self.thin if thin is None else thin
    warmup_steps = self.warmup_steps if warmup_steps is None else warmup_steps
    num_chains = self.num_chains if num_chains is None else num_chains
    init_strategy = self.init_strategy if init_strategy is None else init_strategy
    num_workers = self.num_workers if num_workers is None else num_workers
    mp_context = self.mp_context if mp_context is None else mp_context
    init_strategy_parameters = (
        self.init_strategy_parameters
        if init_strategy_parameters is None
        else init_strategy_parameters
    )

    assert (
        method == "slice_np_vectorized"
    ), "Batched sampling only supported for vectorized samplers!"

    # warn if num_chains is larger than num requested samples
    if num_chains > torch.Size(sample_shape).numel():
        warnings.warn(
            "The passed number of MCMC chains is larger than the number of "
            f"requested samples: {num_chains} > {torch.Size(sample_shape).numel()},"
            f" resetting it to {torch.Size(sample_shape).numel()}.",
            stacklevel=2,
        )
        num_chains = torch.Size(sample_shape).numel()

    # custom shape handling to make sure to match the batch size of x and theta
    # without unnecessary combinations.
    if len(x.shape) == 1:
        x = x.unsqueeze(0)
    batch_size = x.shape[0]

    x = reshape_to_batch_event(x, event_shape=x.shape[1:])

    # For batched sampling, we want `num_chains` for each observation in the batch.
    # Here we repeat the observations ABC -> AAABBBCCC, so that the chains are
    # in the order of the observations.
    x_ = x.repeat_interleave(num_chains, dim=0)

    self.potential_fn.set_x(x_, x_is_iid=False)
    self.potential_ = self._prepare_potential(method)  # type: ignore

    # For each observation in the batch, we have num_chains independent chains.
    num_chains_extended = batch_size * num_chains
    if num_chains_extended > 100:
        warnings.warn(
            "Note that for batched sampling, we use num_chains many chains for each"
            " x in the batch. With the given settings, this results in a large "
            f"number large number of chains ({num_chains_extended}), which can be "
            "slow and memory-intensive for vectorized MCMC. Consider reducing the "
            "number of chains.",
            stacklevel=2,
        )
    init_strategy_parameters["num_return_samples"] = num_chains_extended
    initial_params = self._get_initial_params_batched(
        x,
        init_strategy,  # type: ignore
        num_chains,  # type: ignore
        num_workers,
        show_progress_bars,
        **init_strategy_parameters,
    )
    # We need num_samples from each posterior in the batch
    num_samples = torch.Size(sample_shape).numel() * batch_size

    with torch.set_grad_enabled(False):
        transformed_samples = self._slice_np_mcmc(
            num_samples=num_samples,
            potential_function=self.potential_,
            initial_params=initial_params,
            thin=thin,  # type: ignore
            warmup_steps=warmup_steps,  # type: ignore
            vectorized=(method == "slice_np_vectorized"),
            interchangeable_chains=False,
            num_workers=num_workers,
            show_progress_bars=show_progress_bars,
        )

    # (num_chains_extended, samples_per_chain, *input_shape)
    samples_per_chain: Tensor = self.theta_transform.inv(transformed_samples)  # type: ignore
    dim_theta = samples_per_chain.shape[-1]
    # We need to collect samples for each x from the respective chains.
    # However, using samples.reshape(*sample_shape, batch_size, dim_theta)
    # does not combine the samples in the right order, since this mixes
    # samples that belong to different `x`. The following permute is a
    # workaround to reshape the samples in the right order.
    samples_per_x = samples_per_chain.reshape((
        batch_size,
        # We are flattening the sample shape here using -1 because we might have
        # generated more samples than requested (more chains, or multiple of
        # chains not matching sample_shape)
        -1,
        dim_theta,
    )).permute(1, 0, -1)

    # Shape is now (-1, batch_size, dim_theta)
    # We can now select the number of requested samples
    samples = samples_per_x[: torch.Size(sample_shape).numel()]
    # and reshape into (*sample_shape, batch_size, dim_theta)
    samples = samples.reshape((*sample_shape, batch_size, dim_theta))
    return samples

set_mcmc_method(method)

Sets sampling method to for MCMC and returns NeuralPosterior.

Parameters:

Name Type Description Default
method str

Method to use.

required

Returns:

Type Description
NeuralPosterior

NeuralPosterior for chainable calls.

Source code in sbi/inference/posteriors/mcmc_posterior.py
170
171
172
173
174
175
176
177
178
179
180
def set_mcmc_method(self, method: str) -> "NeuralPosterior":
    """Sets sampling method to for MCMC and returns `NeuralPosterior`.

    Args:
        method: Method to use.

    Returns:
        `NeuralPosterior` for chainable calls.
    """
    self._mcmc_method = method
    return self

RejectionPosterior

Bases: NeuralPosterior

Provides rejection sampling to sample from the posterior.

SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling.

Source code in sbi/inference/posteriors/rejection_posterior.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
class RejectionPosterior(NeuralPosterior):
    r"""Provides rejection sampling to sample from the posterior.<br/><br/>
    SNLE or SNRE train neural networks to approximate the likelihood(-ratios).
    `RejectionPosterior` allows to sample from the posterior with rejection sampling.
    """

    def __init__(
        self,
        potential_fn: Union[Callable, BasePotential],
        proposal: Any,
        theta_transform: Optional[TorchTransform] = None,
        max_sampling_batch_size: int = 10_000,
        num_samples_to_find_max: int = 10_000,
        num_iter_to_find_max: int = 100,
        m: float = 1.2,
        device: Optional[str] = None,
        x_shape: Optional[torch.Size] = None,
    ):
        """
        Args:
            potential_fn: The potential function from which to draw samples. Must be a
                `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
            proposal: The proposal distribution.
            theta_transform: Transformation that is applied to parameters. Is not used
                during but only when calling `.map()`.
            max_sampling_batch_size: The batchsize of samples being drawn from
                the proposal at every iteration.
            num_samples_to_find_max: The number of samples that are used to find the
                maximum of the `potential_fn / proposal` ratio.
            num_iter_to_find_max: The number of gradient ascent iterations to find the
                maximum of the `potential_fn / proposal` ratio.
            m: Multiplier to the `potential_fn / proposal` ratio.
            device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
                `potential_fn.device` is used.
            x_shape: Deprecated, should not be passed.
        """
        super().__init__(
            potential_fn,
            theta_transform=theta_transform,
            device=device,
            x_shape=x_shape,
        )

        self.proposal = proposal
        self.max_sampling_batch_size = max_sampling_batch_size
        self.num_samples_to_find_max = num_samples_to_find_max
        self.num_iter_to_find_max = num_iter_to_find_max
        self.m = m

        self._purpose = (
            "It provides rejection sampling to .sample() from the posterior and "
            "can evaluate the _unnormalized_ posterior density with .log_prob()."
        )

    def log_prob(
        self, theta: Tensor, x: Optional[Tensor] = None, track_gradients: bool = False
    ) -> Tensor:
        r"""Returns the log-probability of theta under the posterior.

        Args:
            theta: Parameters $\theta$.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.

        Returns:
            `len($\theta$)`-shaped log-probability.
        """
        warn(
            "`.log_prob()` is deprecated for methods that can only evaluate the "
            "log-probability up to a normalizing constant. Use `.potential()` instead.",
            stacklevel=2,
        )
        warn("The log-probability is unnormalized!", stacklevel=2)

        self.potential_fn.set_x(self._x_else_default_x(x))

        theta = ensure_theta_batched(torch.as_tensor(theta))
        return self.potential_fn(
            theta.to(self._device), track_gradients=track_gradients
        )

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        max_sampling_batch_size: Optional[int] = None,
        num_samples_to_find_max: Optional[int] = None,
        num_iter_to_find_max: Optional[int] = None,
        m: Optional[float] = None,
        sample_with: Optional[str] = None,
        show_progress_bars: bool = True,
    ):
        r"""Return samples from posterior $p(\theta|x)$ via rejection sampling.

        Args:
            sample_shape: Desired shape of samples that are drawn from posterior. If
                sample_shape is multidimensional we simply draw `sample_shape.numel()`
                samples and then reshape into the desired shape.
            sample_with: This argument only exists to keep backward-compatibility with
                `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
            show_progress_bars: Whether to show sampling progress monitor.

        Returns:
            Samples from posterior.
        """
        num_samples = torch.Size(sample_shape).numel()
        self.potential_fn.set_x(self._x_else_default_x(x))

        potential = partial(self.potential_fn, track_gradients=True)

        if sample_with is not None:
            raise ValueError(
                f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
                f"`sample_with` is no longer supported. You have to rerun "
                f"`.build_posterior(sample_with={sample_with}).`"
            )
        # Replace arguments that were not passed with their default.
        max_sampling_batch_size = (
            self.max_sampling_batch_size
            if max_sampling_batch_size is None
            else max_sampling_batch_size
        )
        num_samples_to_find_max = (
            self.num_samples_to_find_max
            if num_samples_to_find_max is None
            else num_samples_to_find_max
        )
        num_iter_to_find_max = (
            self.num_iter_to_find_max
            if num_iter_to_find_max is None
            else num_iter_to_find_max
        )
        m = self.m if m is None else m

        samples, _ = rejection_sample(
            potential,
            proposal=self.proposal,
            num_samples=num_samples,
            show_progress_bars=show_progress_bars,
            warn_acceptance=0.01,
            max_sampling_batch_size=max_sampling_batch_size,
            num_samples_to_find_max=num_samples_to_find_max,
            num_iter_to_find_max=num_iter_to_find_max,
            m=m,
            device=self._device,
        )

        return samples.reshape((*sample_shape, -1))

    def sample_batched(
        self,
        sample_shape: Shape,
        x: Tensor,
        max_sampling_batch_size: int = 10000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        raise NotImplementedError(
            "Batched sampling is not implemented for RejectionPosterior. \
            Alternatively you can use `sample` in a loop \
            [posterior.sample(theta, x_o) for x_o in x]."
        )

    def map(
        self,
        x: Optional[Tensor] = None,
        num_iter: int = 1_000,
        num_to_optimize: int = 100,
        learning_rate: float = 0.01,
        init_method: Union[str, Tensor] = "proposal",
        num_init_samples: int = 1_000,
        save_best_every: int = 10,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from
                the posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.
            log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
                {'norm_posterior': True} for SNPE.

        Returns:
            The MAP estimate.
        """
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

__init__(potential_fn, proposal, theta_transform=None, max_sampling_batch_size=10000, num_samples_to_find_max=10000, num_iter_to_find_max=100, m=1.2, device=None, x_shape=None)

Parameters:

Name Type Description Default
potential_fn Union[Callable, BasePotential]

The potential function from which to draw samples. Must be a BasePotential or a Callable which takes theta and x_o as inputs.

required
proposal Any

The proposal distribution.

required
theta_transform Optional[TorchTransform]

Transformation that is applied to parameters. Is not used during but only when calling .map().

None
max_sampling_batch_size int

The batchsize of samples being drawn from the proposal at every iteration.

10000
num_samples_to_find_max int

The number of samples that are used to find the maximum of the potential_fn / proposal ratio.

10000
num_iter_to_find_max int

The number of gradient ascent iterations to find the maximum of the potential_fn / proposal ratio.

100
m float

Multiplier to the potential_fn / proposal ratio.

1.2
device Optional[str]

Training device, e.g., “cpu”, “cuda” or “cuda:0”. If None, potential_fn.device is used.

None
x_shape Optional[Size]

Deprecated, should not be passed.

None
Source code in sbi/inference/posteriors/rejection_posterior.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    potential_fn: Union[Callable, BasePotential],
    proposal: Any,
    theta_transform: Optional[TorchTransform] = None,
    max_sampling_batch_size: int = 10_000,
    num_samples_to_find_max: int = 10_000,
    num_iter_to_find_max: int = 100,
    m: float = 1.2,
    device: Optional[str] = None,
    x_shape: Optional[torch.Size] = None,
):
    """
    Args:
        potential_fn: The potential function from which to draw samples. Must be a
            `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
        proposal: The proposal distribution.
        theta_transform: Transformation that is applied to parameters. Is not used
            during but only when calling `.map()`.
        max_sampling_batch_size: The batchsize of samples being drawn from
            the proposal at every iteration.
        num_samples_to_find_max: The number of samples that are used to find the
            maximum of the `potential_fn / proposal` ratio.
        num_iter_to_find_max: The number of gradient ascent iterations to find the
            maximum of the `potential_fn / proposal` ratio.
        m: Multiplier to the `potential_fn / proposal` ratio.
        device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
            `potential_fn.device` is used.
        x_shape: Deprecated, should not be passed.
    """
    super().__init__(
        potential_fn,
        theta_transform=theta_transform,
        device=device,
        x_shape=x_shape,
    )

    self.proposal = proposal
    self.max_sampling_batch_size = max_sampling_batch_size
    self.num_samples_to_find_max = num_samples_to_find_max
    self.num_iter_to_find_max = num_iter_to_find_max
    self.m = m

    self._purpose = (
        "It provides rejection sampling to .sample() from the posterior and "
        "can evaluate the _unnormalized_ posterior density with .log_prob()."
    )

log_prob(theta, x=None, track_gradients=False)

Returns the log-probability of theta under the posterior.

Parameters:

Name Type Description Default
theta Tensor

Parameters \(\theta\).

required
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False

Returns:

Type Description
Tensor

len($\theta$)-shaped log-probability.

Source code in sbi/inference/posteriors/rejection_posterior.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def log_prob(
    self, theta: Tensor, x: Optional[Tensor] = None, track_gradients: bool = False
) -> Tensor:
    r"""Returns the log-probability of theta under the posterior.

    Args:
        theta: Parameters $\theta$.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.

    Returns:
        `len($\theta$)`-shaped log-probability.
    """
    warn(
        "`.log_prob()` is deprecated for methods that can only evaluate the "
        "log-probability up to a normalizing constant. Use `.potential()` instead.",
        stacklevel=2,
    )
    warn("The log-probability is unnormalized!", stacklevel=2)

    self.potential_fn.set_x(self._x_else_default_x(x))

    theta = ensure_theta_batched(torch.as_tensor(theta))
    return self.potential_fn(
        theta.to(self._device), track_gradients=track_gradients
    )

map(x=None, num_iter=1000, num_to_optimize=100, learning_rate=0.01, init_method='proposal', num_init_samples=1000, save_best_every=10, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
learning_rate float

Learning rate of the optimizer.

0.01
init_method Union[str, Tensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'proposal'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

1000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

100
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

10
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False
log_prob_kwargs

Will be empty for SNLE and SNRE. Will contain {‘norm_posterior’: True} for SNPE.

required

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/rejection_posterior.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def map(
    self,
    x: Optional[Tensor] = None,
    num_iter: int = 1_000,
    num_to_optimize: int = 100,
    learning_rate: float = 0.01,
    init_method: Union[str, Tensor] = "proposal",
    num_init_samples: int = 1_000,
    save_best_every: int = 10,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from
            the posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.
        log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
            {'norm_posterior': True} for SNPE.

    Returns:
        The MAP estimate.
    """
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, max_sampling_batch_size=None, num_samples_to_find_max=None, num_iter_to_find_max=None, m=None, sample_with=None, show_progress_bars=True)

Return samples from posterior \(p(\theta|x)\) via rejection sampling.

Parameters:

Name Type Description Default
sample_shape Shape

Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape.

Size()
sample_with Optional[str]

This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error.

None
show_progress_bars bool

Whether to show sampling progress monitor.

True

Returns:

Type Description

Samples from posterior.

Source code in sbi/inference/posteriors/rejection_posterior.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    max_sampling_batch_size: Optional[int] = None,
    num_samples_to_find_max: Optional[int] = None,
    num_iter_to_find_max: Optional[int] = None,
    m: Optional[float] = None,
    sample_with: Optional[str] = None,
    show_progress_bars: bool = True,
):
    r"""Return samples from posterior $p(\theta|x)$ via rejection sampling.

    Args:
        sample_shape: Desired shape of samples that are drawn from posterior. If
            sample_shape is multidimensional we simply draw `sample_shape.numel()`
            samples and then reshape into the desired shape.
        sample_with: This argument only exists to keep backward-compatibility with
            `sbi` v0.17.2 or older. If it is set, we instantly raise an error.
        show_progress_bars: Whether to show sampling progress monitor.

    Returns:
        Samples from posterior.
    """
    num_samples = torch.Size(sample_shape).numel()
    self.potential_fn.set_x(self._x_else_default_x(x))

    potential = partial(self.potential_fn, track_gradients=True)

    if sample_with is not None:
        raise ValueError(
            f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
            f"`sample_with` is no longer supported. You have to rerun "
            f"`.build_posterior(sample_with={sample_with}).`"
        )
    # Replace arguments that were not passed with their default.
    max_sampling_batch_size = (
        self.max_sampling_batch_size
        if max_sampling_batch_size is None
        else max_sampling_batch_size
    )
    num_samples_to_find_max = (
        self.num_samples_to_find_max
        if num_samples_to_find_max is None
        else num_samples_to_find_max
    )
    num_iter_to_find_max = (
        self.num_iter_to_find_max
        if num_iter_to_find_max is None
        else num_iter_to_find_max
    )
    m = self.m if m is None else m

    samples, _ = rejection_sample(
        potential,
        proposal=self.proposal,
        num_samples=num_samples,
        show_progress_bars=show_progress_bars,
        warn_acceptance=0.01,
        max_sampling_batch_size=max_sampling_batch_size,
        num_samples_to_find_max=num_samples_to_find_max,
        num_iter_to_find_max=num_iter_to_find_max,
        m=m,
        device=self._device,
    )

    return samples.reshape((*sample_shape, -1))

ScorePosterior

Bases: NeuralPosterior

Posterior \(p(\theta|x_o)\) with log_prob() and sample() methods. It samples from the diffusion model given the score_estimator and rejects samples that lie outside of the prior bounds.

The posterior is defined by a score estimator and a prior. The score estimator provides the gradient of the log-posterior with respect to the parameters. The prior is used to reject samples that lie outside of the prior bounds.

Sampling is done by running a diffusion process with a predictor and optionally a corrector.

Log probabilities are obtained by calling the potential function, which in turn uses zuko probabilistic ODEs to compute the log-probability.

Source code in sbi/inference/posteriors/score_posterior.py
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
class ScorePosterior(NeuralPosterior):
    r"""Posterior $p(\theta|x_o)$ with `log_prob()` and `sample()` methods. It samples
    from the diffusion model given the score_estimator and rejects samples that lie
    outside of the prior bounds.

    The posterior is defined by a score estimator and a prior. The score estimator
    provides the gradient of the log-posterior with respect to the parameters. The prior
    is used to reject samples that lie outside of the prior bounds.

    Sampling is done by running a diffusion process with a predictor and optionally a
    corrector.

    Log probabilities are obtained by calling the potential function, which in turn uses
    zuko probabilistic ODEs to compute the log-probability.
    """

    def __init__(
        self,
        score_estimator: ConditionalScoreEstimator,
        prior: Distribution,
        max_sampling_batch_size: int = 10_000,
        device: Optional[str] = None,
        enable_transform: bool = False,
        sample_with: str = "sde",
    ):
        """
        Args:
            prior: Prior distribution with `.log_prob()` and `.sample()`.
            score_estimator: The trained neural score estimator.
            max_sampling_batch_size: Batchsize of samples being drawn from
                the proposal at every iteration.
            device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
                `potential_fn.device` is used.
            enable_transform: Whether to transform parameters to unconstrained space
                during MAP optimization. When False, an identity transform will be
                returned for `theta_transform`. True is not supported yet.
            sample_with: Whether to sample from the posterior using the ODE-based
                sampler or the SDE-based sampler.
        """

        check_prior(prior)
        potential_fn, theta_transform = score_estimator_based_potential(
            score_estimator,
            prior,
            x_o=None,
            enable_transform=enable_transform,
        )
        super().__init__(
            potential_fn=potential_fn,
            theta_transform=theta_transform,
            device=device,
        )
        # Set the potential function type.
        self.potential_fn: PosteriorScoreBasedPotential = potential_fn

        self.prior = prior
        self.score_estimator = score_estimator

        self.sample_with = sample_with
        assert self.sample_with in [
            "ode",
            "sde",
        ], f"sample_with must be 'ode' or 'sde', but is {self.sample_with}."
        self.max_sampling_batch_size = max_sampling_batch_size

        self._purpose = """It samples from the diffusion model given the \
            score_estimator."""

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        predictor: Union[str, Predictor] = "euler_maruyama",
        corrector: Optional[Union[str, Corrector]] = None,
        predictor_params: Optional[Dict] = None,
        corrector_params: Optional[Dict] = None,
        steps: int = 500,
        ts: Optional[Tensor] = None,
        max_sampling_batch_size: int = 10_000,
        sample_with: Optional[str] = None,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Return samples from posterior distribution $p(\theta|x)$.

        Args:
            sample_shape: Shape of the samples to be drawn.
            x: Deprecated - use `.set_default_x()` prior to `.sample()`.
            predictor: The predictor for the diffusion-based sampler. Can be a string or
                a custom predictor following the API in `sbi.samplers.score.predictors`.
                Currently, only `euler_maruyama` is implemented.
            corrector: The corrector for the diffusion-based sampler. Either of
                [None].
            predictor_params: Additional parameters passed to predictor.
            corrector_params: Additional parameters passed to corrector.
            steps: Number of steps to take for the Euler-Maruyama method.
            ts: Time points at which to evaluate the diffusion process. If None, a
                linear grid between t_max and t_min is used.
            max_sampling_batch_size: Maximum batch size for sampling.
            sample_with: Deprecated - use `.build_posterior(sample_with=...)` prior to
                `.sample()`.
            show_progress_bars: Whether to show a progress bar during sampling.
        """

        if sample_with is not None:
            raise ValueError(
                f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
                f"`sample_with` is no longer supported. You have to rerun "
                f"`.build_posterior(sample_with={sample_with}).`"
            )

        x = self._x_else_default_x(x)
        x = reshape_to_batch_event(x, self.score_estimator.condition_shape)
        self.potential_fn.set_x(x)

        if self.sample_with == "ode":
            samples = self.sample_via_zuko(sample_shape=sample_shape, x=x)
        elif self.sample_with == "sde":
            samples = self._sample_via_diffusion(
                sample_shape=sample_shape,
                predictor=predictor,
                corrector=corrector,
                predictor_params=predictor_params,
                corrector_params=corrector_params,
                steps=steps,
                ts=ts,
                max_sampling_batch_size=max_sampling_batch_size,
                show_progress_bars=show_progress_bars,
            )

        return samples

    def _sample_via_diffusion(
        self,
        sample_shape: Shape = torch.Size(),
        predictor: Union[str, Predictor] = "euler_maruyama",
        corrector: Optional[Union[str, Corrector]] = None,
        predictor_params: Optional[Dict] = None,
        corrector_params: Optional[Dict] = None,
        steps: int = 500,
        ts: Optional[Tensor] = None,
        max_sampling_batch_size: int = 10_000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        r"""Return samples from posterior distribution $p(\theta|x)$.

        Args:
            sample_shape: Shape of the samples to be drawn.
            x: Deprecated - use `.set_default_x()` prior to `.sample()`.
            predictor: The predictor for the diffusion-based sampler. Can be a string or
                a custom predictor following the API in `sbi.samplers.score.predictors`.
                Currently, only `euler_maruyama` is implemented.
            corrector: The corrector for the diffusion-based sampler. Either of
                [None].
            steps: Number of steps to take for the Euler-Maruyama method.
            ts: Time points at which to evaluate the diffusion process. If None, a
                linear grid between t_max and t_min is used.
            max_sampling_batch_size: Maximum batch size for sampling.
            sample_with: Deprecated - use `.build_posterior(sample_with=...)` prior to
                `.sample()`.
            show_progress_bars: Whether to show a progress bar during sampling.
        """

        num_samples = torch.Size(sample_shape).numel()

        max_sampling_batch_size = (
            self.max_sampling_batch_size
            if max_sampling_batch_size is None
            else max_sampling_batch_size
        )

        if ts is None:
            t_max = self.score_estimator.t_max
            t_min = self.score_estimator.t_min
            ts = torch.linspace(t_max, t_min, steps)

        diffuser = Diffuser(
            self.potential_fn,
            predictor=predictor,
            corrector=corrector,
            predictor_params=predictor_params,
            corrector_params=corrector_params,
        )
        max_sampling_batch_size = min(max_sampling_batch_size, num_samples)
        samples = []
        num_iter = num_samples // max_sampling_batch_size
        num_iter = (
            num_iter + 1 if (num_samples % max_sampling_batch_size) != 0 else num_iter
        )
        for _ in range(num_iter):
            samples.append(
                diffuser.run(
                    num_samples=max_sampling_batch_size,
                    ts=ts,
                    show_progress_bars=show_progress_bars,
                )
            )
        samples = torch.cat(samples, dim=0)[:num_samples]

        return samples.reshape(sample_shape + self.score_estimator.input_shape)

    def sample_via_zuko(
        self,
        x: Tensor,
        sample_shape: Shape = torch.Size(),
    ) -> Tensor:
        r"""Return samples from posterior distribution with probability flow ODE.

        This build the probability flow ODE and then samples from the corresponding
        flow. This is implemented via the zuko library.

        Args:
            x: Condition.
            sample_shape: The shape of the samples to be returned.

        Returns:
            Samples.
        """
        num_samples = torch.Size(sample_shape).numel()

        flow = self.potential_fn.get_continuous_normalizing_flow(condition=x)
        samples = flow.sample(torch.Size((num_samples,)))

        return samples.reshape(sample_shape + self.score_estimator.input_shape)

    def log_prob(
        self,
        theta: Tensor,
        x: Optional[Tensor] = None,
        track_gradients: bool = False,
        atol: float = 1e-5,
        rtol: float = 1e-6,
        exact: bool = True,
    ) -> Tensor:
        r"""Returns the log-probability of the posterior $p(\theta|x)$.

        This requires building and evaluating the probability flow ODE.

        Args:
            theta: Parameters $\theta$.
            x: Observed data $x_o$. If None, the default $x_o$ is used.
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis, but increases memory
                consumption.
            atol: Absolute tolerance for the ODE solver.
            rtol: Relative tolerance for the ODE solver.
            exact: Whether to use the exact Jacobian of the transformation or an
                stochastic approximation, which is faster but less accurate.

        Returns:
            `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the
            support of the prior, -∞ (corresponding to 0 probability) outside.
        """
        self.potential_fn.set_x(self._x_else_default_x(x))

        theta = ensure_theta_batched(torch.as_tensor(theta))
        return self.potential_fn(
            theta.to(self._device),
            track_gradients=track_gradients,
            atol=atol,
            rtol=rtol,
            exact=exact,
        )

    def sample_batched(
        self,
        sample_shape: torch.Size,
        x: Tensor,
        max_sampling_batch_size: int = 10000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        raise NotImplementedError(
            "Batched sampling is not implemented for ScorePosterior."
        )

    def map(
        self,
        x: Optional[Tensor] = None,
        num_iter: int = 1000,
        num_to_optimize: int = 1000,
        learning_rate: float = 1e-5,
        init_method: Union[str, Tensor] = "posterior",
        num_init_samples: int = 1000,
        save_best_every: int = 1000,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from
                the posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.

        Returns:
            The MAP estimate.
        """
        raise NotImplementedError(
            "MAP estimation is currently not working accurately for ScorePosterior."
        )
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

__init__(score_estimator, prior, max_sampling_batch_size=10000, device=None, enable_transform=False, sample_with='sde')

Parameters:

Name Type Description Default
prior Distribution

Prior distribution with .log_prob() and .sample().

required
score_estimator ConditionalScoreEstimator

The trained neural score estimator.

required
max_sampling_batch_size int

Batchsize of samples being drawn from the proposal at every iteration.

10000
device Optional[str]

Training device, e.g., “cpu”, “cuda” or “cuda:0”. If None, potential_fn.device is used.

None
enable_transform bool

Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform. True is not supported yet.

False
sample_with str

Whether to sample from the posterior using the ODE-based sampler or the SDE-based sampler.

'sde'
Source code in sbi/inference/posteriors/score_posterior.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def __init__(
    self,
    score_estimator: ConditionalScoreEstimator,
    prior: Distribution,
    max_sampling_batch_size: int = 10_000,
    device: Optional[str] = None,
    enable_transform: bool = False,
    sample_with: str = "sde",
):
    """
    Args:
        prior: Prior distribution with `.log_prob()` and `.sample()`.
        score_estimator: The trained neural score estimator.
        max_sampling_batch_size: Batchsize of samples being drawn from
            the proposal at every iteration.
        device: Training device, e.g., "cpu", "cuda" or "cuda:0". If None,
            `potential_fn.device` is used.
        enable_transform: Whether to transform parameters to unconstrained space
            during MAP optimization. When False, an identity transform will be
            returned for `theta_transform`. True is not supported yet.
        sample_with: Whether to sample from the posterior using the ODE-based
            sampler or the SDE-based sampler.
    """

    check_prior(prior)
    potential_fn, theta_transform = score_estimator_based_potential(
        score_estimator,
        prior,
        x_o=None,
        enable_transform=enable_transform,
    )
    super().__init__(
        potential_fn=potential_fn,
        theta_transform=theta_transform,
        device=device,
    )
    # Set the potential function type.
    self.potential_fn: PosteriorScoreBasedPotential = potential_fn

    self.prior = prior
    self.score_estimator = score_estimator

    self.sample_with = sample_with
    assert self.sample_with in [
        "ode",
        "sde",
    ], f"sample_with must be 'ode' or 'sde', but is {self.sample_with}."
    self.max_sampling_batch_size = max_sampling_batch_size

    self._purpose = """It samples from the diffusion model given the \
        score_estimator."""

log_prob(theta, x=None, track_gradients=False, atol=1e-05, rtol=1e-06, exact=True)

Returns the log-probability of the posterior \(p(\theta|x)\).

This requires building and evaluating the probability flow ODE.

Parameters:

Name Type Description Default
theta Tensor

Parameters \(\theta\).

required
x Optional[Tensor]

Observed data \(x_o\). If None, the default \(x_o\) is used.

None
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption.

False
atol float

Absolute tolerance for the ODE solver.

1e-05
rtol float

Relative tolerance for the ODE solver.

1e-06
exact bool

Whether to use the exact Jacobian of the transformation or an stochastic approximation, which is faster but less accurate.

True

Returns:

Type Description
Tensor

(len(θ),)-shaped log posterior probability \(\log p(\theta|x)\) for θ in the

Tensor

support of the prior, -∞ (corresponding to 0 probability) outside.

Source code in sbi/inference/posteriors/score_posterior.py
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def log_prob(
    self,
    theta: Tensor,
    x: Optional[Tensor] = None,
    track_gradients: bool = False,
    atol: float = 1e-5,
    rtol: float = 1e-6,
    exact: bool = True,
) -> Tensor:
    r"""Returns the log-probability of the posterior $p(\theta|x)$.

    This requires building and evaluating the probability flow ODE.

    Args:
        theta: Parameters $\theta$.
        x: Observed data $x_o$. If None, the default $x_o$ is used.
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis, but increases memory
            consumption.
        atol: Absolute tolerance for the ODE solver.
        rtol: Relative tolerance for the ODE solver.
        exact: Whether to use the exact Jacobian of the transformation or an
            stochastic approximation, which is faster but less accurate.

    Returns:
        `(len(θ),)`-shaped log posterior probability $\log p(\theta|x)$ for θ in the
        support of the prior, -∞ (corresponding to 0 probability) outside.
    """
    self.potential_fn.set_x(self._x_else_default_x(x))

    theta = ensure_theta_batched(torch.as_tensor(theta))
    return self.potential_fn(
        theta.to(self._device),
        track_gradients=track_gradients,
        atol=atol,
        rtol=rtol,
        exact=exact,
    )

map(x=None, num_iter=1000, num_to_optimize=1000, learning_rate=1e-05, init_method='posterior', num_init_samples=1000, save_best_every=1000, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

1000
learning_rate float

Learning rate of the optimizer.

1e-05
init_method Union[str, Tensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'posterior'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

1000
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

1000
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/score_posterior.py
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
def map(
    self,
    x: Optional[Tensor] = None,
    num_iter: int = 1000,
    num_to_optimize: int = 1000,
    learning_rate: float = 1e-5,
    init_method: Union[str, Tensor] = "posterior",
    num_init_samples: int = 1000,
    save_best_every: int = 1000,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from
            the posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.

    Returns:
        The MAP estimate.
    """
    raise NotImplementedError(
        "MAP estimation is currently not working accurately for ScorePosterior."
    )
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, predictor='euler_maruyama', corrector=None, predictor_params=None, corrector_params=None, steps=500, ts=None, max_sampling_batch_size=10000, sample_with=None, show_progress_bars=True)

Return samples from posterior distribution \(p(\theta|x)\).

Parameters:

Name Type Description Default
sample_shape Shape

Shape of the samples to be drawn.

Size()
x Optional[Tensor]

Deprecated - use .set_default_x() prior to .sample().

None
predictor Union[str, Predictor]

The predictor for the diffusion-based sampler. Can be a string or a custom predictor following the API in sbi.samplers.score.predictors. Currently, only euler_maruyama is implemented.

'euler_maruyama'
corrector Optional[Union[str, Corrector]]

The corrector for the diffusion-based sampler. Either of [None].

None
predictor_params Optional[Dict]

Additional parameters passed to predictor.

None
corrector_params Optional[Dict]

Additional parameters passed to corrector.

None
steps int

Number of steps to take for the Euler-Maruyama method.

500
ts Optional[Tensor]

Time points at which to evaluate the diffusion process. If None, a linear grid between t_max and t_min is used.

None
max_sampling_batch_size int

Maximum batch size for sampling.

10000
sample_with Optional[str]

Deprecated - use .build_posterior(sample_with=...) prior to .sample().

None
show_progress_bars bool

Whether to show a progress bar during sampling.

True
Source code in sbi/inference/posteriors/score_posterior.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    predictor: Union[str, Predictor] = "euler_maruyama",
    corrector: Optional[Union[str, Corrector]] = None,
    predictor_params: Optional[Dict] = None,
    corrector_params: Optional[Dict] = None,
    steps: int = 500,
    ts: Optional[Tensor] = None,
    max_sampling_batch_size: int = 10_000,
    sample_with: Optional[str] = None,
    show_progress_bars: bool = True,
) -> Tensor:
    r"""Return samples from posterior distribution $p(\theta|x)$.

    Args:
        sample_shape: Shape of the samples to be drawn.
        x: Deprecated - use `.set_default_x()` prior to `.sample()`.
        predictor: The predictor for the diffusion-based sampler. Can be a string or
            a custom predictor following the API in `sbi.samplers.score.predictors`.
            Currently, only `euler_maruyama` is implemented.
        corrector: The corrector for the diffusion-based sampler. Either of
            [None].
        predictor_params: Additional parameters passed to predictor.
        corrector_params: Additional parameters passed to corrector.
        steps: Number of steps to take for the Euler-Maruyama method.
        ts: Time points at which to evaluate the diffusion process. If None, a
            linear grid between t_max and t_min is used.
        max_sampling_batch_size: Maximum batch size for sampling.
        sample_with: Deprecated - use `.build_posterior(sample_with=...)` prior to
            `.sample()`.
        show_progress_bars: Whether to show a progress bar during sampling.
    """

    if sample_with is not None:
        raise ValueError(
            f"You set `sample_with={sample_with}`. As of sbi v0.18.0, setting "
            f"`sample_with` is no longer supported. You have to rerun "
            f"`.build_posterior(sample_with={sample_with}).`"
        )

    x = self._x_else_default_x(x)
    x = reshape_to_batch_event(x, self.score_estimator.condition_shape)
    self.potential_fn.set_x(x)

    if self.sample_with == "ode":
        samples = self.sample_via_zuko(sample_shape=sample_shape, x=x)
    elif self.sample_with == "sde":
        samples = self._sample_via_diffusion(
            sample_shape=sample_shape,
            predictor=predictor,
            corrector=corrector,
            predictor_params=predictor_params,
            corrector_params=corrector_params,
            steps=steps,
            ts=ts,
            max_sampling_batch_size=max_sampling_batch_size,
            show_progress_bars=show_progress_bars,
        )

    return samples

sample_via_zuko(x, sample_shape=torch.Size())

Return samples from posterior distribution with probability flow ODE.

This build the probability flow ODE and then samples from the corresponding flow. This is implemented via the zuko library.

Parameters:

Name Type Description Default
x Tensor

Condition.

required
sample_shape Shape

The shape of the samples to be returned.

Size()

Returns:

Type Description
Tensor

Samples.

Source code in sbi/inference/posteriors/score_posterior.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
def sample_via_zuko(
    self,
    x: Tensor,
    sample_shape: Shape = torch.Size(),
) -> Tensor:
    r"""Return samples from posterior distribution with probability flow ODE.

    This build the probability flow ODE and then samples from the corresponding
    flow. This is implemented via the zuko library.

    Args:
        x: Condition.
        sample_shape: The shape of the samples to be returned.

    Returns:
        Samples.
    """
    num_samples = torch.Size(sample_shape).numel()

    flow = self.potential_fn.get_continuous_normalizing_flow(condition=x)
    samples = flow.sample(torch.Size((num_samples,)))

    return samples.reshape(sample_shape + self.score_estimator.input_shape)

VIPosterior

Bases: NeuralPosterior

Provides VI (Variational Inference) to sample from the posterior.

SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \(q(\theta)\) which approximates the true posterior \(p(\theta|x_o)\). After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2].

References:
[1] Variational methods for simulation-based inference, Manuel Glöckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY
[2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522

Source code in sbi/inference/posteriors/vi_posterior.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
class VIPosterior(NeuralPosterior):
    r"""Provides VI (Variational Inference) to sample from the posterior.<br/><br/>
    SNLE or SNRE train neural networks to approximate the likelihood(-ratios).
    `VIPosterior` allows to learn a tractable variational posterior $q(\theta)$ which
    approximates the true posterior $p(\theta|x_o)$. After this second training stage,
    we can produce approximate posterior samples, by just sampling from q with no
    additional cost. For additional information see [1] and [2].<br/><br/>
    References:<br/>
    [1] Variational methods for simulation-based inference, Manuel Glöckler, Michael
    Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY<br/>
    [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes
    Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522
    """

    def __init__(
        self,
        potential_fn: Union[Callable, BasePotential],
        prior: Optional[TorchDistribution] = None,
        q: Union[str, PyroTransformedDistribution, "VIPosterior", Callable] = "maf",
        theta_transform: Optional[TorchTransform] = None,
        vi_method: str = "rKL",
        device: str = "cpu",
        x_shape: Optional[torch.Size] = None,
        parameters: Iterable = [],
        modules: Iterable = [],
    ):
        """
        Args:
            potential_fn: The potential function from which to draw samples. Must be a
                `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
            prior: This is the prior distribution. Note that this is only
                used to check/construct the variational distribution or within some
                quality metrics. Please make sure that this matches with the prior
                within the potential_fn. If `None` is given, we will try to infer it
                from potential_fn or q, if this fails we raise an Error.
            q: Variational distribution, either string, `TransformedDistribution`, or a
                `VIPosterior` object. This specifies a parametric class of distribution
                over which the best possible posterior approximation is searched. For
                string input, we currently support [nsf, scf, maf, mcf, gaussian,
                gaussian_diag]. You can also specify your own variational family by
                passing a pyro `TransformedDistribution`.
                Additionally, we allow a `Callable`, which allows you the pass a
                `builder` function, which if called returns a distribution. This may be
                useful for setting the hyperparameters e.g. `num_transfroms` within the
                `get_flow_builder` method specifying the number of transformations
                within a normalizing flow. If q is already a `VIPosterior`, then the
                arguments will be copied from it (relevant for multi-round training).
            theta_transform: Maps form prior support to unconstrained space. The
                inverse is used here to ensure that the posterior support is equal to
                that of the prior.
            vi_method: This specifies the variational methods which are used to fit q to
                the posterior. We currently support [rKL, fKL, IW, alpha]. Note that
                some of the divergences are `mode seeking` i.e. they underestimate
                variance and collapse on multimodal targets (`rKL`, `alpha` for alpha >
                1) and some are `mass covering` i.e. they overestimate variance but
                typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1).
            device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure
                that all other objects are also on this device.
            x_shape: Deprecated, should not be passed.
            parameters: List of parameters of the variational posterior. This is only
                required for user-defined q i.e. if q does not have a `parameters`
                attribute.
            modules: List of modules of the variational posterior. This is only
                required for user-defined q i.e. if q does not have a `modules`
                attribute.
        """
        super().__init__(potential_fn, theta_transform, device, x_shape=x_shape)

        # Especially the prior may be on another device -> move it...
        self._device = device
        self.potential_fn.device = device
        move_all_tensor_to_device(self.potential_fn, device)

        # Get prior and previous builds
        if prior is not None:
            self._prior = prior
        elif hasattr(self.potential_fn, "prior") and isinstance(
            self.potential_fn.prior, Distribution
        ):
            self._prior = self.potential_fn.prior
        elif isinstance(q, VIPosterior) and isinstance(q._prior, Distribution):
            self._prior = q._prior
        else:
            raise ValueError(
                "We could not find a suitable prior distribution within `potential_fn` "
                "or `q` (if a VIPosterior is given). Please explicitly specify a prior."
            )
        move_all_tensor_to_device(self._prior, device)
        self._optimizer = None

        # In contrast to MCMC we want to project into constrained space.
        if theta_transform is None:
            self.link_transform = mcmc_transform(self._prior).inv
        else:
            self.link_transform = theta_transform.inv

        # This will set the variational distribution and VI method
        self.set_q(q, parameters=parameters, modules=modules)
        self.set_vi_method(vi_method)

        self._purpose = (
            "It provides Variational inference to .sample() from the posterior and "
            "can evaluate the _normalized_ posterior density with .log_prob()."
        )

    @property
    def q(self) -> Distribution:
        """Returns the variational posterior."""
        return self._q

    @q.setter
    def q(
        self,
        q: Union[str, Distribution, "VIPosterior", Callable],
    ) -> None:
        """Sets the variational distribution. If the distribution does not admit access
        through `parameters` and `modules` function, please use `set_q` if you want to
        explicitly specify the parameters and modules.


        Args:
            q: Variational distribution, either string, distribution, or a VIPosterior
                object. This specifies a parametric class of distribution over which
                the best possible posterior approximation is searched. For string input,
                we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of
                course, you can also specify your own variational family by passing a
                `parameterized` distribution object i.e. a torch.distributions
                Distribution with methods `parameters` returning an iterable of all
                parameters (you can pass them within the paramters/modules attribute).
                Additionally, we allow a `Callable`, which allows you the pass a
                `builder` function, which if called returns an distribution. This may be
                useful for setting the hyperparameters e.g. `num_transfroms:int` by
                using the `get_flow_builder` method specifying the hyperparameters. If q
                is already a `VIPosterior`, then the arguments will be copied from it
                (relevant for multi-round training).


        """
        self.set_q(q)

    def set_q(
        self,
        q: Union[str, PyroTransformedDistribution, "VIPosterior", Callable],
        parameters: Iterable = [],
        modules: Iterable = [],
    ) -> None:
        """Defines the variational family.

        You can specify over which parameters/modules we optimize. This is required for
        custom distributions which e.g. do not inherit nn.Modules or has the function
        `parameters` or `modules` to give direct access to trainable parameters.
        Further, you can pass a function, which constructs a variational distribution
        if called.

        Args:
            q: Variational distribution, either string, distribution, or a VIPosterior
                object. This specifies a parametric class of distribution over which
                the best possible posterior approximation is searched. For string input,
                we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of
                course, you can also specify your own variational family by passing a
                `parameterized` distribution object i.e. a torch.distributions
                Distribution with methods `parameters` returning an iterable of all
                parameters (you can pass them within the paramters/modules attribute).
                Additionally, we allow a `Callable`, which allows you the pass a
                `builder` function, which if called returns an distribution. This may be
                useful for setting the hyperparameters e.g. `num_transfroms:int` by
                using the `get_flow_builder` method specifying the hyperparameters. If q
                is already a `VIPosterior`, then the arguments will be copied from it
                (relevant for multi-round training).
            parameters: List of parameters associated with the distribution object.
            modules: List of modules associated with the distribution object.

        """
        self._q_arg = (q, parameters, modules)
        if isinstance(q, Distribution):
            q = adapt_variational_distribution(
                q,
                self._prior,
                self.link_transform,
                parameters=parameters,
                modules=modules,
            )
            make_object_deepcopy_compatible(q)
            self_custom_q_init_cache = deepcopy(q)
            self._q_build_fn = lambda *args, **kwargs: self_custom_q_init_cache
            self._trained_on = None
        elif isinstance(q, (str, Callable)):
            if isinstance(q, str):
                self._q_build_fn = get_flow_builder(q)
            else:
                self._q_build_fn = q

            q = self._q_build_fn(
                self._prior.event_shape,
                self.link_transform,
                device=self._device,
            )
            make_object_deepcopy_compatible(q)
            self._trained_on = None
        elif isinstance(q, VIPosterior):
            self._q_build_fn = q._q_build_fn
            self._trained_on = q._trained_on
            self.vi_method = q.vi_method  # type: ignore
            self._device = q._device
            self._prior = q._prior
            self._x = q._x
            self._q_arg = q._q_arg
            make_object_deepcopy_compatible(q.q)
            q = deepcopy(q.q)
        move_all_tensor_to_device(q, self._device)
        assert isinstance(
            q, Distribution
        ), """Something went wrong when initializing the variational distribution.
            Please create an issue on github https://github.com/mackelab/sbi/issues"""
        check_variational_distribution(q, self._prior)
        self._q = q

    @property
    def vi_method(self) -> str:
        """Variational inference method e.g. one of [rKL, fKL, IW, alpha]."""
        return self._vi_method

    @vi_method.setter
    def vi_method(self, method: str) -> None:
        """See `set_vi_method`."""
        self.set_vi_method(method)

    def set_vi_method(self, method: str) -> "VIPosterior":
        """Sets variational inference method.

        Args:
            method: One of [rKL, fKL, IW, alpha].

        Returns:
            `VIPosterior` for chainable calls.
        """
        self._vi_method = method
        self._optimizer_builder = get_VI_method(method)
        return self

    def sample(
        self,
        sample_shape: Shape = torch.Size(),
        x: Optional[Tensor] = None,
        **kwargs,
    ) -> Tensor:
        """Samples from the variational posterior distribution.

        Args:
            sample_shape: Shape of samples

        Returns:
            Samples from posterior.
        """
        x = self._x_else_default_x(x)
        if self._trained_on is None or (x != self._trained_on).all():
            raise AttributeError(
                f"The variational posterior was not fit on the specified `default_x` "
                f"{x}. Please train using `posterior.train()`."
            )
        samples = self.q.sample(torch.Size(sample_shape))
        return samples.reshape((*sample_shape, samples.shape[-1]))

    def sample_batched(
        self,
        sample_shape: Shape,
        x: Tensor,
        max_sampling_batch_size: int = 10000,
        show_progress_bars: bool = True,
    ) -> Tensor:
        raise NotImplementedError(
            "Batched sampling is not implemented for VIPosterior. \
            Alternatively you can use `sample` in a loop \
            [posterior.sample(theta, x_o) for x_o in x]."
        )

    def log_prob(
        self,
        theta: Tensor,
        x: Optional[Tensor] = None,
        track_gradients: bool = False,
    ) -> Tensor:
        r"""Returns the log-probability of theta under the variational posterior.

        Args:
            theta: Parameters
            track_gradients: Whether the returned tensor supports tracking gradients.
                This can be helpful for e.g. sensitivity analysis but increases memory
                consumption.

        Returns:
            `len($\theta$)`-shaped log-probability.
        """
        x = self._x_else_default_x(x)
        if self._trained_on is None or (x != self._trained_on).all():
            raise AttributeError(
                f"The variational posterior was not fit using observation {x}.\
                     Please train."
            )
        with torch.set_grad_enabled(track_gradients):
            theta = ensure_theta_batched(torch.as_tensor(theta))
            return self.q.log_prob(theta)

    def train(
        self,
        x: Optional[TorchTensor] = None,
        n_particles: int = 256,
        learning_rate: float = 1e-3,
        gamma: float = 0.999,
        max_num_iters: int = 2000,
        min_num_iters: int = 10,
        clip_value: float = 10.0,
        warm_up_rounds: int = 100,
        retrain_from_scratch: bool = False,
        reset_optimizer: bool = False,
        show_progress_bar: bool = True,
        check_for_convergence: bool = True,
        quality_control: bool = True,
        quality_control_metric: str = "psis",
        **kwargs,
    ) -> "VIPosterior":
        """This method trains the variational posterior.

        Args:
            x: The observation.
            n_particles: Number of samples to approximate expectations within the
                variational bounds. The larger the more accurate are gradient
                estimates, but the computational cost per iteration increases.
            learning_rate: Learning rate of the optimizer.
            gamma: Learning rate decay per iteration. We use an exponential decay
                scheduler.
            max_num_iters: Maximum number of iterations.
            min_num_iters: Minimum number of iterations.
            clip_value: Gradient clipping value, decreasing may help if you see invalid
                values.
            warm_up_rounds: Initialize the posterior as the prior.
            retrain_from_scratch: Retrain the variational distributions from scratch.
            reset_optimizer: Reset the divergence optimizer
            show_progress_bar: If any progress report should be displayed.
            quality_control: If False quality control is skipped.
            quality_control_metric: Which metric to use for evaluating the quality.
            kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail
                eps: Determines sensitivity of convergence check.
                retain_graph: Boolean which decides whether to retain the computation
                    graph. This may be required for some `exotic` user-specified q's.
                optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See
                    `DivergenceOptimizer` for details.
                scheduler: A PyTorch learning rate scheduler. See
                    `DivergenceOptimizer` for details.
                alpha: Only used if vi_method=`alpha`. Determines the alpha divergence.
                K: Only used if vi_method=`IW`. Determines the number of importance
                    weighted particles.
                stick_the_landing: If one should use the STL estimator (only for rKL,
                    IW, alpha).
                dreg: If one should use the DREG estimator (only for rKL, IW, alpha).
                weight_transform: Callable applied to importance weights (only for fKL)
        Returns:
            VIPosterior: `VIPosterior` (can be used to chain calls).
        """
        # Update optimizer with current arguments.
        if self._optimizer is not None:
            self._optimizer.update({**locals(), **kwargs})

        # Init q and the optimizer if necessary
        if retrain_from_scratch:
            self.q = self._q_build_fn()  # type: ignore
            self._optimizer = self._optimizer_builder(
                self.potential_fn,
                self.q,
                lr=learning_rate,
                clip_value=clip_value,
                gamma=gamma,
                n_particles=n_particles,
                prior=self._prior,
                **kwargs,
            )

        if (
            reset_optimizer
            or self._optimizer is None
            or not isinstance(self._optimizer, self._optimizer_builder)
        ):
            self._optimizer = self._optimizer_builder(
                self.potential_fn,
                self.q,
                lr=learning_rate,
                clip_value=clip_value,
                gamma=gamma,
                n_particles=n_particles,
                prior=self._prior,
                **kwargs,
            )

        # Check context
        x = atleast_2d_float32_tensor(self._x_else_default_x(x)).to(  # type: ignore
            self._device
        )

        already_trained = self._trained_on is not None and (x == self._trained_on).all()

        # Optimize
        optimizer = self._optimizer
        optimizer.to(self._device)
        optimizer.reset_loss_stats()

        if show_progress_bar:
            iters = tqdm(range(max_num_iters))
        else:
            iters = range(max_num_iters)

        # Warmup before training
        if reset_optimizer or (not optimizer.warm_up_was_done and not already_trained):
            if show_progress_bar:
                iters.set_description(  # type: ignore
                    "Warmup phase, this may take a few seconds..."
                )
            optimizer.warm_up(warm_up_rounds)

        for i in iters:
            optimizer.step(x)
            mean_loss, std_loss = optimizer.get_loss_stats()
            # Update progress bar
            if show_progress_bar:
                assert isinstance(iters, tqdm)
                iters.set_description(  # type: ignore
                    f"Loss: {np.round(float(mean_loss), 2)}"
                    f"Std: {np.round(float(std_loss), 2)}"
                )
            # Check for convergence
            if check_for_convergence and i > min_num_iters and optimizer.converged():
                if show_progress_bar:
                    print(f"\nConverged with loss: {np.round(float(mean_loss), 2)}")
                break
        # Training finished:
        self._trained_on = x

        # Evaluate quality
        if quality_control:
            try:
                self.evaluate(quality_control_metric=quality_control_metric)
            except Exception as e:
                print(
                    f"Quality control showed a low quality of the variational "
                    f"posterior. We are automatically retraining the variational "
                    f"posterior from scratch with a smaller learning rate. "
                    f"Alternatively, if you want to skip quality control, please "
                    f"retrain with `VIPosterior.train(..., quality_control=False)`. "
                    f"\nThe error that occured is: {e}"
                )
                self.train(
                    learning_rate=learning_rate * 0.1,
                    retrain_from_scratch=True,
                    reset_optimizer=True,
                )

        return self

    def evaluate(self, quality_control_metric: str = "psis", N: int = int(5e4)) -> None:
        """This function will evaluate the quality of the variational posterior
        distribution. We currently support two different metrics of type `psis`, which
        checks the quality based on the tails of importance weights (there should not be
        much with a large one), or `prop` which checks the proportionality between q
        and potential_fn.

        NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok``
        whereas `psis` is more sensitive in distinguishing `very bad` from `ok`.

        Args:
            quality_control_metric: The metric of choice, we currently support [psis,
                prop, prop_prior].
            N: Number of samples which is used to evaluate the metric.
        """
        quality_control_fn, quality_control_msg = get_quality_metric(
            quality_control_metric
        )
        metric = round(float(quality_control_fn(self, N=N)), 3)
        print(f"Quality Score: {metric} " + quality_control_msg)

    def map(
        self,
        x: Optional[TorchTensor] = None,
        num_iter: int = 1_000,
        num_to_optimize: int = 100,
        learning_rate: float = 0.01,
        init_method: Union[str, TorchTensor] = "proposal",
        num_init_samples: int = 10_000,
        save_best_every: int = 10,
        show_progress_bars: bool = False,
        force_update: bool = False,
    ) -> Tensor:
        r"""Returns the maximum-a-posteriori estimate (MAP).

        The method can be interrupted (Ctrl-C) when the user sees that the
        log-probability converges. The best estimate will be saved in `self._map` and
        can be accessed with `self.map()`. The MAP is obtained by running gradient
        ascent from a given number of starting positions (samples from the posterior
        with the highest log-probability). After the optimization is done, we select the
        parameter set that has the highest log-probability after the optimization.

        Warning: The default values used by this function are not well-tested. They
        might require hand-tuning for the problem at hand.

        For developers: if the prior is a `BoxUniform`, we carry out the optimization
        in unbounded space and transform the result back into bounded space.

        Args:
            x: Deprecated - use `.set_default_x()` prior to `.map()`.
            num_iter: Number of optimization steps that the algorithm takes
                to find the MAP.
            learning_rate: Learning rate of the optimizer.
            init_method: How to select the starting parameters for the optimization. If
                it is a string, it can be either [`posterior`, `prior`], which samples
                the respective distribution `num_init_samples` times. If it is a
                tensor, the tensor will be used as init locations.
            num_init_samples: Draw this number of samples from the posterior and
                evaluate the log-probability of all of them.
            num_to_optimize: From the drawn `num_init_samples`, use the
                `num_to_optimize` with highest log-probability as the initial points
                for the optimization.
            save_best_every: The best log-probability is computed, saved in the
                `map`-attribute, and printed every `save_best_every`-th iteration.
                Computing the best log-probability creates a significant overhead
                (thus, the default is `10`.)
            show_progress_bars: Whether to show a progressbar during sampling from
                the posterior.
            force_update: Whether to re-calculate the MAP when x is unchanged and
                have a cached value.
            log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
                {'norm_posterior': True} for SNPE.

        Returns:
            The MAP estimate.
        """
        self.proposal = self.q
        return super().map(
            x=x,
            num_iter=num_iter,
            num_to_optimize=num_to_optimize,
            learning_rate=learning_rate,
            init_method=init_method,
            num_init_samples=num_init_samples,
            save_best_every=save_best_every,
            show_progress_bars=show_progress_bars,
            force_update=force_update,
        )

    def __deepcopy__(self, memo: Optional[Dict] = None) -> "VIPosterior":
        """This method is called when using `copy.deepcopy` on the object.

        It defines how the object is copied. We need to overwrite this method, since the
        default implementation does use __getstate__ and __setstate__ which we overwrite
        to enable pickling (and in particular the necessary modifications are
        incompatible deep copying).

        Args:
            memo (Optional[Dict], optional): Deep copy internal memo. Defaults to None.

        Returns:
            VIPosterior: Deep copy of the VIPosterior.
        """
        if memo is None:
            memo = {}
        # Create a new instance of the class
        cls = self.__class__
        result = cls.__new__(cls)
        # Add to memo
        memo[id(self)] = result
        # Copy attributes
        for k, v in self.__dict__.items():
            setattr(result, k, copy.deepcopy(v, memo))
        return result

    def __getstate__(self) -> Dict:
        """This method is called when pickling the object.

        It defines what is pickled. We need to overwrite this method, since some parts
        due not support pickle protocols (e.g. due to local functions, etc.).

        Returns:
            Dict: All attributes of the VIPosterior.
        """
        self._optimizer = None
        self.__deepcopy__ = None  # type: ignore
        self._q_build_fn = None
        self._q.__deepcopy__ = None  # type: ignore
        return self.__dict__

    def __setstate__(self, state_dict: Dict):
        """This method is called when unpickling the object.

        Especially, we need to restore the removed attributes and ensure that the object
        e.g. remains deep copy compatible.

        Args:
            state_dict: Given state dictionary, we will restore the object from it.
        """
        self.__dict__ = state_dict
        q = deepcopy(self._q)
        # Restore removed attributes
        self.set_q(*self._q_arg)
        self._q = q
        make_object_deepcopy_compatible(self)
        make_object_deepcopy_compatible(self.q)

q: Distribution property writable

Returns the variational posterior.

vi_method: str property writable

Variational inference method e.g. one of [rKL, fKL, IW, alpha].

__deepcopy__(memo=None)

This method is called when using copy.deepcopy on the object.

It defines how the object is copied. We need to overwrite this method, since the default implementation does use getstate and setstate which we overwrite to enable pickling (and in particular the necessary modifications are incompatible deep copying).

Parameters:

Name Type Description Default
memo Optional[Dict]

Deep copy internal memo. Defaults to None.

None

Returns:

Name Type Description
VIPosterior VIPosterior

Deep copy of the VIPosterior.

Source code in sbi/inference/posteriors/vi_posterior.py
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
def __deepcopy__(self, memo: Optional[Dict] = None) -> "VIPosterior":
    """This method is called when using `copy.deepcopy` on the object.

    It defines how the object is copied. We need to overwrite this method, since the
    default implementation does use __getstate__ and __setstate__ which we overwrite
    to enable pickling (and in particular the necessary modifications are
    incompatible deep copying).

    Args:
        memo (Optional[Dict], optional): Deep copy internal memo. Defaults to None.

    Returns:
        VIPosterior: Deep copy of the VIPosterior.
    """
    if memo is None:
        memo = {}
    # Create a new instance of the class
    cls = self.__class__
    result = cls.__new__(cls)
    # Add to memo
    memo[id(self)] = result
    # Copy attributes
    for k, v in self.__dict__.items():
        setattr(result, k, copy.deepcopy(v, memo))
    return result

__getstate__()

This method is called when pickling the object.

It defines what is pickled. We need to overwrite this method, since some parts due not support pickle protocols (e.g. due to local functions, etc.).

Returns:

Name Type Description
Dict Dict

All attributes of the VIPosterior.

Source code in sbi/inference/posteriors/vi_posterior.py
608
609
610
611
612
613
614
615
616
617
618
619
620
621
def __getstate__(self) -> Dict:
    """This method is called when pickling the object.

    It defines what is pickled. We need to overwrite this method, since some parts
    due not support pickle protocols (e.g. due to local functions, etc.).

    Returns:
        Dict: All attributes of the VIPosterior.
    """
    self._optimizer = None
    self.__deepcopy__ = None  # type: ignore
    self._q_build_fn = None
    self._q.__deepcopy__ = None  # type: ignore
    return self.__dict__

__init__(potential_fn, prior=None, q='maf', theta_transform=None, vi_method='rKL', device='cpu', x_shape=None, parameters=[], modules=[])

Parameters:

Name Type Description Default
potential_fn Union[Callable, BasePotential]

The potential function from which to draw samples. Must be a BasePotential or a Callable which takes theta and x_o as inputs.

required
prior Optional[TorchDistribution]

This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If None is given, we will try to infer it from potential_fn or q, if this fails we raise an Error.

None
q Union[str, PyroTransformedDistribution, VIPosterior, Callable]

Variational distribution, either string, TransformedDistribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro TransformedDistribution. Additionally, we allow a Callable, which allows you the pass a builder function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. num_transfroms within the get_flow_builder method specifying the number of transformations within a normalizing flow. If q is already a VIPosterior, then the arguments will be copied from it (relevant for multi-round training).

'maf'
theta_transform Optional[TorchTransform]

Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior.

None
vi_method str

This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are mode seeking i.e. they underestimate variance and collapse on multimodal targets (rKL, alpha for alpha > 1) and some are mass covering i.e. they overestimate variance but typically cover all modes (fKL, IW, alpha for alpha < 1).

'rKL'
device str

Training device, e.g., cpu, cuda or cuda:0. We will ensure that all other objects are also on this device.

'cpu'
x_shape Optional[Size]

Deprecated, should not be passed.

None
parameters Iterable

List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a parameters attribute.

[]
modules Iterable

List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a modules attribute.

[]
Source code in sbi/inference/posteriors/vi_posterior.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
def __init__(
    self,
    potential_fn: Union[Callable, BasePotential],
    prior: Optional[TorchDistribution] = None,
    q: Union[str, PyroTransformedDistribution, "VIPosterior", Callable] = "maf",
    theta_transform: Optional[TorchTransform] = None,
    vi_method: str = "rKL",
    device: str = "cpu",
    x_shape: Optional[torch.Size] = None,
    parameters: Iterable = [],
    modules: Iterable = [],
):
    """
    Args:
        potential_fn: The potential function from which to draw samples. Must be a
            `BasePotential` or a `Callable` which takes `theta` and `x_o` as inputs.
        prior: This is the prior distribution. Note that this is only
            used to check/construct the variational distribution or within some
            quality metrics. Please make sure that this matches with the prior
            within the potential_fn. If `None` is given, we will try to infer it
            from potential_fn or q, if this fails we raise an Error.
        q: Variational distribution, either string, `TransformedDistribution`, or a
            `VIPosterior` object. This specifies a parametric class of distribution
            over which the best possible posterior approximation is searched. For
            string input, we currently support [nsf, scf, maf, mcf, gaussian,
            gaussian_diag]. You can also specify your own variational family by
            passing a pyro `TransformedDistribution`.
            Additionally, we allow a `Callable`, which allows you the pass a
            `builder` function, which if called returns a distribution. This may be
            useful for setting the hyperparameters e.g. `num_transfroms` within the
            `get_flow_builder` method specifying the number of transformations
            within a normalizing flow. If q is already a `VIPosterior`, then the
            arguments will be copied from it (relevant for multi-round training).
        theta_transform: Maps form prior support to unconstrained space. The
            inverse is used here to ensure that the posterior support is equal to
            that of the prior.
        vi_method: This specifies the variational methods which are used to fit q to
            the posterior. We currently support [rKL, fKL, IW, alpha]. Note that
            some of the divergences are `mode seeking` i.e. they underestimate
            variance and collapse on multimodal targets (`rKL`, `alpha` for alpha >
            1) and some are `mass covering` i.e. they overestimate variance but
            typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1).
        device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure
            that all other objects are also on this device.
        x_shape: Deprecated, should not be passed.
        parameters: List of parameters of the variational posterior. This is only
            required for user-defined q i.e. if q does not have a `parameters`
            attribute.
        modules: List of modules of the variational posterior. This is only
            required for user-defined q i.e. if q does not have a `modules`
            attribute.
    """
    super().__init__(potential_fn, theta_transform, device, x_shape=x_shape)

    # Especially the prior may be on another device -> move it...
    self._device = device
    self.potential_fn.device = device
    move_all_tensor_to_device(self.potential_fn, device)

    # Get prior and previous builds
    if prior is not None:
        self._prior = prior
    elif hasattr(self.potential_fn, "prior") and isinstance(
        self.potential_fn.prior, Distribution
    ):
        self._prior = self.potential_fn.prior
    elif isinstance(q, VIPosterior) and isinstance(q._prior, Distribution):
        self._prior = q._prior
    else:
        raise ValueError(
            "We could not find a suitable prior distribution within `potential_fn` "
            "or `q` (if a VIPosterior is given). Please explicitly specify a prior."
        )
    move_all_tensor_to_device(self._prior, device)
    self._optimizer = None

    # In contrast to MCMC we want to project into constrained space.
    if theta_transform is None:
        self.link_transform = mcmc_transform(self._prior).inv
    else:
        self.link_transform = theta_transform.inv

    # This will set the variational distribution and VI method
    self.set_q(q, parameters=parameters, modules=modules)
    self.set_vi_method(vi_method)

    self._purpose = (
        "It provides Variational inference to .sample() from the posterior and "
        "can evaluate the _normalized_ posterior density with .log_prob()."
    )

__setstate__(state_dict)

This method is called when unpickling the object.

Especially, we need to restore the removed attributes and ensure that the object e.g. remains deep copy compatible.

Parameters:

Name Type Description Default
state_dict Dict

Given state dictionary, we will restore the object from it.

required
Source code in sbi/inference/posteriors/vi_posterior.py
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
def __setstate__(self, state_dict: Dict):
    """This method is called when unpickling the object.

    Especially, we need to restore the removed attributes and ensure that the object
    e.g. remains deep copy compatible.

    Args:
        state_dict: Given state dictionary, we will restore the object from it.
    """
    self.__dict__ = state_dict
    q = deepcopy(self._q)
    # Restore removed attributes
    self.set_q(*self._q_arg)
    self._q = q
    make_object_deepcopy_compatible(self)
    make_object_deepcopy_compatible(self.q)

evaluate(quality_control_metric='psis', N=int(50000.0))

This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type psis, which checks the quality based on the tails of importance weights (there should not be much with a large one), or prop which checks the proportionality between q and potential_fn.

NOTE: In our experience prop is sensitive to distinguish good from ok whereas psis is more sensitive in distinguishing very bad from ok.

Parameters:

Name Type Description Default
quality_control_metric str

The metric of choice, we currently support [psis, prop, prop_prior].

'psis'
N int

Number of samples which is used to evaluate the metric.

int(50000.0)
Source code in sbi/inference/posteriors/vi_posterior.py
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
def evaluate(self, quality_control_metric: str = "psis", N: int = int(5e4)) -> None:
    """This function will evaluate the quality of the variational posterior
    distribution. We currently support two different metrics of type `psis`, which
    checks the quality based on the tails of importance weights (there should not be
    much with a large one), or `prop` which checks the proportionality between q
    and potential_fn.

    NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok``
    whereas `psis` is more sensitive in distinguishing `very bad` from `ok`.

    Args:
        quality_control_metric: The metric of choice, we currently support [psis,
            prop, prop_prior].
        N: Number of samples which is used to evaluate the metric.
    """
    quality_control_fn, quality_control_msg = get_quality_metric(
        quality_control_metric
    )
    metric = round(float(quality_control_fn(self, N=N)), 3)
    print(f"Quality Score: {metric} " + quality_control_msg)

log_prob(theta, x=None, track_gradients=False)

Returns the log-probability of theta under the variational posterior.

Parameters:

Name Type Description Default
theta Tensor

Parameters

required
track_gradients bool

Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption.

False

Returns:

Type Description
Tensor

len($\theta$)-shaped log-probability.

Source code in sbi/inference/posteriors/vi_posterior.py
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
def log_prob(
    self,
    theta: Tensor,
    x: Optional[Tensor] = None,
    track_gradients: bool = False,
) -> Tensor:
    r"""Returns the log-probability of theta under the variational posterior.

    Args:
        theta: Parameters
        track_gradients: Whether the returned tensor supports tracking gradients.
            This can be helpful for e.g. sensitivity analysis but increases memory
            consumption.

    Returns:
        `len($\theta$)`-shaped log-probability.
    """
    x = self._x_else_default_x(x)
    if self._trained_on is None or (x != self._trained_on).all():
        raise AttributeError(
            f"The variational posterior was not fit using observation {x}.\
                 Please train."
        )
    with torch.set_grad_enabled(track_gradients):
        theta = ensure_theta_batched(torch.as_tensor(theta))
        return self.q.log_prob(theta)

map(x=None, num_iter=1000, num_to_optimize=100, learning_rate=0.01, init_method='proposal', num_init_samples=10000, save_best_every=10, show_progress_bars=False, force_update=False)

Returns the maximum-a-posteriori estimate (MAP).

The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map(). The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization.

Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand.

For developers: if the prior is a BoxUniform, we carry out the optimization in unbounded space and transform the result back into bounded space.

Parameters:

Name Type Description Default
x Optional[TorchTensor]

Deprecated - use .set_default_x() prior to .map().

None
num_iter int

Number of optimization steps that the algorithm takes to find the MAP.

1000
learning_rate float

Learning rate of the optimizer.

0.01
init_method Union[str, TorchTensor]

How to select the starting parameters for the optimization. If it is a string, it can be either [posterior, prior], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations.

'proposal'
num_init_samples int

Draw this number of samples from the posterior and evaluate the log-probability of all of them.

10000
num_to_optimize int

From the drawn num_init_samples, use the num_to_optimize with highest log-probability as the initial points for the optimization.

100
save_best_every int

The best log-probability is computed, saved in the map-attribute, and printed every save_best_every-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10.)

10
show_progress_bars bool

Whether to show a progressbar during sampling from the posterior.

False
force_update bool

Whether to re-calculate the MAP when x is unchanged and have a cached value.

False
log_prob_kwargs

Will be empty for SNLE and SNRE. Will contain {‘norm_posterior’: True} for SNPE.

required

Returns:

Type Description
Tensor

The MAP estimate.

Source code in sbi/inference/posteriors/vi_posterior.py
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
def map(
    self,
    x: Optional[TorchTensor] = None,
    num_iter: int = 1_000,
    num_to_optimize: int = 100,
    learning_rate: float = 0.01,
    init_method: Union[str, TorchTensor] = "proposal",
    num_init_samples: int = 10_000,
    save_best_every: int = 10,
    show_progress_bars: bool = False,
    force_update: bool = False,
) -> Tensor:
    r"""Returns the maximum-a-posteriori estimate (MAP).

    The method can be interrupted (Ctrl-C) when the user sees that the
    log-probability converges. The best estimate will be saved in `self._map` and
    can be accessed with `self.map()`. The MAP is obtained by running gradient
    ascent from a given number of starting positions (samples from the posterior
    with the highest log-probability). After the optimization is done, we select the
    parameter set that has the highest log-probability after the optimization.

    Warning: The default values used by this function are not well-tested. They
    might require hand-tuning for the problem at hand.

    For developers: if the prior is a `BoxUniform`, we carry out the optimization
    in unbounded space and transform the result back into bounded space.

    Args:
        x: Deprecated - use `.set_default_x()` prior to `.map()`.
        num_iter: Number of optimization steps that the algorithm takes
            to find the MAP.
        learning_rate: Learning rate of the optimizer.
        init_method: How to select the starting parameters for the optimization. If
            it is a string, it can be either [`posterior`, `prior`], which samples
            the respective distribution `num_init_samples` times. If it is a
            tensor, the tensor will be used as init locations.
        num_init_samples: Draw this number of samples from the posterior and
            evaluate the log-probability of all of them.
        num_to_optimize: From the drawn `num_init_samples`, use the
            `num_to_optimize` with highest log-probability as the initial points
            for the optimization.
        save_best_every: The best log-probability is computed, saved in the
            `map`-attribute, and printed every `save_best_every`-th iteration.
            Computing the best log-probability creates a significant overhead
            (thus, the default is `10`.)
        show_progress_bars: Whether to show a progressbar during sampling from
            the posterior.
        force_update: Whether to re-calculate the MAP when x is unchanged and
            have a cached value.
        log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain
            {'norm_posterior': True} for SNPE.

    Returns:
        The MAP estimate.
    """
    self.proposal = self.q
    return super().map(
        x=x,
        num_iter=num_iter,
        num_to_optimize=num_to_optimize,
        learning_rate=learning_rate,
        init_method=init_method,
        num_init_samples=num_init_samples,
        save_best_every=save_best_every,
        show_progress_bars=show_progress_bars,
        force_update=force_update,
    )

sample(sample_shape=torch.Size(), x=None, **kwargs)

Samples from the variational posterior distribution.

Parameters:

Name Type Description Default
sample_shape Shape

Shape of samples

Size()

Returns:

Type Description
Tensor

Samples from posterior.

Source code in sbi/inference/posteriors/vi_posterior.py
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
def sample(
    self,
    sample_shape: Shape = torch.Size(),
    x: Optional[Tensor] = None,
    **kwargs,
) -> Tensor:
    """Samples from the variational posterior distribution.

    Args:
        sample_shape: Shape of samples

    Returns:
        Samples from posterior.
    """
    x = self._x_else_default_x(x)
    if self._trained_on is None or (x != self._trained_on).all():
        raise AttributeError(
            f"The variational posterior was not fit on the specified `default_x` "
            f"{x}. Please train using `posterior.train()`."
        )
    samples = self.q.sample(torch.Size(sample_shape))
    return samples.reshape((*sample_shape, samples.shape[-1]))

set_q(q, parameters=[], modules=[])

Defines the variational family.

You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function parameters or modules to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called.

Parameters:

Name Type Description Default
q Union[str, PyroTransformedDistribution, VIPosterior, Callable]

Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a parameterized distribution object i.e. a torch.distributions Distribution with methods parameters returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a Callable, which allows you the pass a builder function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. num_transfroms:int by using the get_flow_builder method specifying the hyperparameters. If q is already a VIPosterior, then the arguments will be copied from it (relevant for multi-round training).

required
parameters Iterable

List of parameters associated with the distribution object.

[]
modules Iterable

List of modules associated with the distribution object.

[]
Source code in sbi/inference/posteriors/vi_posterior.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
def set_q(
    self,
    q: Union[str, PyroTransformedDistribution, "VIPosterior", Callable],
    parameters: Iterable = [],
    modules: Iterable = [],
) -> None:
    """Defines the variational family.

    You can specify over which parameters/modules we optimize. This is required for
    custom distributions which e.g. do not inherit nn.Modules or has the function
    `parameters` or `modules` to give direct access to trainable parameters.
    Further, you can pass a function, which constructs a variational distribution
    if called.

    Args:
        q: Variational distribution, either string, distribution, or a VIPosterior
            object. This specifies a parametric class of distribution over which
            the best possible posterior approximation is searched. For string input,
            we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of
            course, you can also specify your own variational family by passing a
            `parameterized` distribution object i.e. a torch.distributions
            Distribution with methods `parameters` returning an iterable of all
            parameters (you can pass them within the paramters/modules attribute).
            Additionally, we allow a `Callable`, which allows you the pass a
            `builder` function, which if called returns an distribution. This may be
            useful for setting the hyperparameters e.g. `num_transfroms:int` by
            using the `get_flow_builder` method specifying the hyperparameters. If q
            is already a `VIPosterior`, then the arguments will be copied from it
            (relevant for multi-round training).
        parameters: List of parameters associated with the distribution object.
        modules: List of modules associated with the distribution object.

    """
    self._q_arg = (q, parameters, modules)
    if isinstance(q, Distribution):
        q = adapt_variational_distribution(
            q,
            self._prior,
            self.link_transform,
            parameters=parameters,
            modules=modules,
        )
        make_object_deepcopy_compatible(q)
        self_custom_q_init_cache = deepcopy(q)
        self._q_build_fn = lambda *args, **kwargs: self_custom_q_init_cache
        self._trained_on = None
    elif isinstance(q, (str, Callable)):
        if isinstance(q, str):
            self._q_build_fn = get_flow_builder(q)
        else:
            self._q_build_fn = q

        q = self._q_build_fn(
            self._prior.event_shape,
            self.link_transform,
            device=self._device,
        )
        make_object_deepcopy_compatible(q)
        self._trained_on = None
    elif isinstance(q, VIPosterior):
        self._q_build_fn = q._q_build_fn
        self._trained_on = q._trained_on
        self.vi_method = q.vi_method  # type: ignore
        self._device = q._device
        self._prior = q._prior
        self._x = q._x
        self._q_arg = q._q_arg
        make_object_deepcopy_compatible(q.q)
        q = deepcopy(q.q)
    move_all_tensor_to_device(q, self._device)
    assert isinstance(
        q, Distribution
    ), """Something went wrong when initializing the variational distribution.
        Please create an issue on github https://github.com/mackelab/sbi/issues"""
    check_variational_distribution(q, self._prior)
    self._q = q

set_vi_method(method)

Sets variational inference method.

Parameters:

Name Type Description Default
method str

One of [rKL, fKL, IW, alpha].

required

Returns:

Type Description
VIPosterior

VIPosterior for chainable calls.

Source code in sbi/inference/posteriors/vi_posterior.py
263
264
265
266
267
268
269
270
271
272
273
274
def set_vi_method(self, method: str) -> "VIPosterior":
    """Sets variational inference method.

    Args:
        method: One of [rKL, fKL, IW, alpha].

    Returns:
        `VIPosterior` for chainable calls.
    """
    self._vi_method = method
    self._optimizer_builder = get_VI_method(method)
    return self

train(x=None, n_particles=256, learning_rate=0.001, gamma=0.999, max_num_iters=2000, min_num_iters=10, clip_value=10.0, warm_up_rounds=100, retrain_from_scratch=False, reset_optimizer=False, show_progress_bar=True, check_for_convergence=True, quality_control=True, quality_control_metric='psis', **kwargs)

This method trains the variational posterior.

Parameters:

Name Type Description Default
x Optional[TorchTensor]

The observation.

None
n_particles int

Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases.

256
learning_rate float

Learning rate of the optimizer.

0.001
gamma float

Learning rate decay per iteration. We use an exponential decay scheduler.

0.999
max_num_iters int

Maximum number of iterations.

2000
min_num_iters int

Minimum number of iterations.

10
clip_value float

Gradient clipping value, decreasing may help if you see invalid values.

10.0
warm_up_rounds int

Initialize the posterior as the prior.

100
retrain_from_scratch bool

Retrain the variational distributions from scratch.

False
reset_optimizer bool

Reset the divergence optimizer

False
show_progress_bar bool

If any progress report should be displayed.

True
quality_control bool

If False quality control is skipped.

True
quality_control_metric str

Which metric to use for evaluating the quality.

'psis'
kwargs

Hyperparameters check corresponding DivergenceOptimizer for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some exotic user-specified q’s. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See DivergenceOptimizer for details. scheduler: A PyTorch learning rate scheduler. See DivergenceOptimizer for details. alpha: Only used if vi_method=alpha. Determines the alpha divergence. K: Only used if vi_method=IW. Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL)

{}

Returns: VIPosterior: VIPosterior (can be used to chain calls).

Source code in sbi/inference/posteriors/vi_posterior.py
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
def train(
    self,
    x: Optional[TorchTensor] = None,
    n_particles: int = 256,
    learning_rate: float = 1e-3,
    gamma: float = 0.999,
    max_num_iters: int = 2000,
    min_num_iters: int = 10,
    clip_value: float = 10.0,
    warm_up_rounds: int = 100,
    retrain_from_scratch: bool = False,
    reset_optimizer: bool = False,
    show_progress_bar: bool = True,
    check_for_convergence: bool = True,
    quality_control: bool = True,
    quality_control_metric: str = "psis",
    **kwargs,
) -> "VIPosterior":
    """This method trains the variational posterior.

    Args:
        x: The observation.
        n_particles: Number of samples to approximate expectations within the
            variational bounds. The larger the more accurate are gradient
            estimates, but the computational cost per iteration increases.
        learning_rate: Learning rate of the optimizer.
        gamma: Learning rate decay per iteration. We use an exponential decay
            scheduler.
        max_num_iters: Maximum number of iterations.
        min_num_iters: Minimum number of iterations.
        clip_value: Gradient clipping value, decreasing may help if you see invalid
            values.
        warm_up_rounds: Initialize the posterior as the prior.
        retrain_from_scratch: Retrain the variational distributions from scratch.
        reset_optimizer: Reset the divergence optimizer
        show_progress_bar: If any progress report should be displayed.
        quality_control: If False quality control is skipped.
        quality_control_metric: Which metric to use for evaluating the quality.
        kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail
            eps: Determines sensitivity of convergence check.
            retain_graph: Boolean which decides whether to retain the computation
                graph. This may be required for some `exotic` user-specified q's.
            optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See
                `DivergenceOptimizer` for details.
            scheduler: A PyTorch learning rate scheduler. See
                `DivergenceOptimizer` for details.
            alpha: Only used if vi_method=`alpha`. Determines the alpha divergence.
            K: Only used if vi_method=`IW`. Determines the number of importance
                weighted particles.
            stick_the_landing: If one should use the STL estimator (only for rKL,
                IW, alpha).
            dreg: If one should use the DREG estimator (only for rKL, IW, alpha).
            weight_transform: Callable applied to importance weights (only for fKL)
    Returns:
        VIPosterior: `VIPosterior` (can be used to chain calls).
    """
    # Update optimizer with current arguments.
    if self._optimizer is not None:
        self._optimizer.update({**locals(), **kwargs})

    # Init q and the optimizer if necessary
    if retrain_from_scratch:
        self.q = self._q_build_fn()  # type: ignore
        self._optimizer = self._optimizer_builder(
            self.potential_fn,
            self.q,
            lr=learning_rate,
            clip_value=clip_value,
            gamma=gamma,
            n_particles=n_particles,
            prior=self._prior,
            **kwargs,
        )

    if (
        reset_optimizer
        or self._optimizer is None
        or not isinstance(self._optimizer, self._optimizer_builder)
    ):
        self._optimizer = self._optimizer_builder(
            self.potential_fn,
            self.q,
            lr=learning_rate,
            clip_value=clip_value,
            gamma=gamma,
            n_particles=n_particles,
            prior=self._prior,
            **kwargs,
        )

    # Check context
    x = atleast_2d_float32_tensor(self._x_else_default_x(x)).to(  # type: ignore
        self._device
    )

    already_trained = self._trained_on is not None and (x == self._trained_on).all()

    # Optimize
    optimizer = self._optimizer
    optimizer.to(self._device)
    optimizer.reset_loss_stats()

    if show_progress_bar:
        iters = tqdm(range(max_num_iters))
    else:
        iters = range(max_num_iters)

    # Warmup before training
    if reset_optimizer or (not optimizer.warm_up_was_done and not already_trained):
        if show_progress_bar:
            iters.set_description(  # type: ignore
                "Warmup phase, this may take a few seconds..."
            )
        optimizer.warm_up(warm_up_rounds)

    for i in iters:
        optimizer.step(x)
        mean_loss, std_loss = optimizer.get_loss_stats()
        # Update progress bar
        if show_progress_bar:
            assert isinstance(iters, tqdm)
            iters.set_description(  # type: ignore
                f"Loss: {np.round(float(mean_loss), 2)}"
                f"Std: {np.round(float(std_loss), 2)}"
            )
        # Check for convergence
        if check_for_convergence and i > min_num_iters and optimizer.converged():
            if show_progress_bar:
                print(f"\nConverged with loss: {np.round(float(mean_loss), 2)}")
            break
    # Training finished:
    self._trained_on = x

    # Evaluate quality
    if quality_control:
        try:
            self.evaluate(quality_control_metric=quality_control_metric)
        except Exception as e:
            print(
                f"Quality control showed a low quality of the variational "
                f"posterior. We are automatically retraining the variational "
                f"posterior from scratch with a smaller learning rate. "
                f"Alternatively, if you want to skip quality control, please "
                f"retrain with `VIPosterior.train(..., quality_control=False)`. "
                f"\nThe error that occured is: {e}"
            )
            self.train(
                learning_rate=learning_rate * 0.1,
                retrain_from_scratch=True,
                reset_optimizer=True,
            )

    return self