Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Low-rank ADVI #3022

Open
wants to merge 11 commits into
base: develop
Choose a base branch
from
2 changes: 1 addition & 1 deletion lib/stan_math
Submodule stan_math updated 1445 files
11 changes: 11 additions & 0 deletions src/stan/services/experimental/advi/defaults.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,17 @@ namespace services {
namespace experimental {
namespace advi {

/**
* Rank of the approximation when using low-rank ADVI.
*/
struct rank {
static std::string description() {
return "Rank of the covariance approximation when using low-rank ADVI.";
}

static int default_value() { return 1; }
};

/**
* Number of samples for Monte Carlo estimate of gradients.
*/
Expand Down
90 changes: 90 additions & 0 deletions src/stan/services/experimental/advi/lowrank.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#ifndef STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP
#define STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP

#include <stan/callbacks/interrupt.hpp>
#include <stan/callbacks/logger.hpp>
#include <stan/callbacks/writer.hpp>
#include <stan/services/util/experimental_message.hpp>
#include <stan/services/util/initialize.hpp>
#include <stan/services/util/create_rng.hpp>
#include <stan/io/var_context.hpp>
#include <stan/variational/advi.hpp>
#include <boost/random/additive_combine.hpp>
#include <string>
#include <vector>

namespace stan {
namespace services {
namespace experimental {
namespace advi {

/**
* Runs low rank ADVI.
*
* @tparam Model A model implementation
* @param[in] model Input model to test (with data already instantiated)
* @param[in] init var context for initialization
* @param[in] random_seed random seed for the random number generator
* @param[in] chain chain id to advance the random number generator
* @param[in] init_radius radius to initialize
* @param[in] grad_samples number of samples for Monte Carlo estimate
* of gradients
* @param[in] elbo_samples number of samples for Monte Carlo estimate
* of ELBO
* @param[in] max_iterations maximum number of iterations
* @param[in] tol_rel_obj convergence tolerance on the relative norm
* of the objective
* @param[in] rank the rank of the approximation
* @param[in] eta stepsize scaling parameter for variational inference
* @param[in] adapt_engaged adaptation engaged?
* @param[in] adapt_iterations number of iterations for eta adaptation
* @param[in] eval_elbo evaluate ELBO every Nth iteration
* @param[in] output_samples number of posterior samples to draw and
* save
* @param[in,out] interrupt callback to be called every iteration
* @param[in,out] logger Logger for messages
* @param[in,out] init_writer Writer callback for unconstrained inits
* @param[in,out] parameter_writer output for parameter values
* @param[in,out] diagnostic_writer output for diagnostic values
* @return error_codes::OK if successful
*/
template <class Model>
int lowrank(Model& model, const stan::io::var_context& init,
unsigned int random_seed, unsigned int chain, double init_radius,
int grad_samples, int elbo_samples, int max_iterations,
double tol_rel_obj, int rank, double eta, bool adapt_engaged,
int adapt_iterations, int eval_elbo, int output_samples,
callbacks::interrupt& interrupt, callbacks::logger& logger,
callbacks::writer& init_writer, callbacks::writer& parameter_writer,
callbacks::writer& diagnostic_writer) {
util::experimental_message(logger);

boost::ecuyer1988 rng = util::create_rng(random_seed, chain);

std::vector<int> disc_vector;
std::vector<double> cont_vector = util::initialize(
model, init, rng, init_radius, true, logger, init_writer);

std::vector<std::string> names;
names.push_back("lp__");
names.push_back("log_p__");
names.push_back("log_g__");
model.constrained_param_names(names, true, true);
parameter_writer(names);

Eigen::VectorXd cont_params
= Eigen::Map<Eigen::VectorXd>(&cont_vector[0], cont_vector.size(), 1);

stan::variational::advi_lowrank<Model, boost::ecuyer1988> cmd_advi(
model, cont_params, rng, rank, grad_samples, elbo_samples, eval_elbo,
output_samples);
cmd_advi.run(eta, adapt_engaged, adapt_iterations, tol_rel_obj,
max_iterations, logger, parameter_writer, diagnostic_writer);

return 0;
}
} // namespace advi
} // namespace experimental
} // namespace services
} // namespace stan
#endif
104 changes: 93 additions & 11 deletions src/stan/variational/advi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stan/variational/print_progress.hpp>
#include <stan/variational/families/normal_fullrank.hpp>
#include <stan/variational/families/normal_meanfield.hpp>
#include <stan/variational/families/normal_lowrank.hpp>
#include <boost/circular_buffer.hpp>
#include <boost/lexical_cast.hpp>
#include <algorithm>
Expand All @@ -30,14 +31,15 @@ namespace variational {
*
* Implements "black box" variational inference using stochastic gradient
* ascent to maximize the Evidence Lower Bound for a given model
* and variational family.
* and variational family. This base class encapsulates the mean-field,
* low-rank, and full-rank variational posterior classes.
*
* @tparam Model class of model
* @tparam Q class of variational distribution
* @tparam BaseRNG class of random number generator
*/
template <class Model, class Q, class BaseRNG>
class advi {
class advi_base {
public:
/**
* Constructor
Expand All @@ -54,9 +56,9 @@ class advi {
* @throw std::runtime_error if eval_elbo is not positive
* @throw std::runtime_error if n_posterior_samples is not positive
*/
advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
int n_posterior_samples)
advi_base(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
int n_posterior_samples)
: model_(m),
cont_params_(cont_params),
rng_(rng),
Expand Down Expand Up @@ -192,10 +194,10 @@ class advi {
}

// Variational family to store gradients
Q elbo_grad = Q(model_.num_params_r());
Q elbo_grad = init_variational(model_.num_params_r());

// Adaptive step-size sequence
Q history_grad_squared = Q(model_.num_params_r());
Q history_grad_squared = init_variational(model_.num_params_r());
double tau = 1.0;
double pre_factor = 0.9;
double post_factor = 0.1;
Expand Down Expand Up @@ -287,7 +289,7 @@ class advi {
history_grad_squared.set_to_zero();
}
++eta_sequence_index;
variational = Q(cont_params_);
variational = init_variational(cont_params_);
}
return eta_best;
}
Expand Down Expand Up @@ -317,10 +319,10 @@ class advi {
stan::math::check_positive(function, "Maximum iterations", max_iterations);

// Gradient parameters
Q elbo_grad = Q(model_.num_params_r());
Q elbo_grad = init_variational(model_.num_params_r());

// Stepsize sequence parameters
Q history_grad_squared = Q(model_.num_params_r());
Q history_grad_squared = init_variational(model_.num_params_r());
double tau = 1.0;
double pre_factor = 0.9;
double post_factor = 0.1;
Expand Down Expand Up @@ -462,7 +464,7 @@ class advi {
diagnostic_writer("iter,time_in_seconds,ELBO");

// Initialize variational approximation
Q variational = Q(cont_params_);
Q variational = init_variational(cont_params_);

if (adapt_engaged) {
eta = adapt_eta(variational, adapt_iterations, logger);
Expand Down Expand Up @@ -562,6 +564,86 @@ class advi {
int n_monte_carlo_elbo_;
int eval_elbo_;
int n_posterior_samples_;

private:
virtual Q init_variational(Eigen::VectorXd& cont_params) const = 0;
virtual Q init_variational(size_t dimension) const = 0;
};

/**
* The ADVI implementation used for meanfield and full-rank approximations.
*
* @tparam Model Class of model
* @tparam Q Class of variational distribution, either mean-field or low-rank.
* @tparam BaseRNG Class of random number generator
*/
template <class Model, class Q, class BaseRNG>
class advi : public advi_base<Model, Q, BaseRNG> {
public:
advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo,
int n_posterior_samples)
: advi_base<Model, Q, BaseRNG>(m, cont_params, rng, n_monte_carlo_grad,
n_monte_carlo_elbo, eval_elbo,
n_posterior_samples) {}

private:
Q init_variational(Eigen::VectorXd& cont_params) const {
return Q(cont_params);
}

Q init_variational(size_t dimension) const { return Q(dimension); }
};

/**
* The ADVI implementation used only for low-rank approximations.
*
* @tparam Model Class of model.
* @tparam BaseRNG Class of random number generator.
*/
template <class Model, class BaseRNG>
class advi_lowrank
: public advi_base<Model, stan::variational::normal_lowrank, BaseRNG> {
public:
/**
* Constructor
*
* @param[in] m stan model
* @param[in] cont_params initialization of continuous parameters
* @param[in,out] rng random number generator
* @param[in] rank rank of approximation
* @param[in] n_monte_carlo_grad number of samples for gradient computation
* @param[in] n_monte_carlo_elbo number of samples for ELBO computation
* @param[in] eval_elbo evaluate ELBO at every "eval_elbo" iters
* @param[in] n_posterior_samples number of samples to draw from posterior
* @throw std::runtime_error if n_monte_carlo_grad is not positive
* @throw std::runtime_error if n_monte_carlo_elbo is not positive
* @throw std::runtime_error if eval_elbo is not positive
* @throw std::runtime_error if n_posterior_samples is not positive
*/
advi_lowrank(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng,
size_t rank, int n_monte_carlo_grad, int n_monte_carlo_elbo,
int eval_elbo, int n_posterior_samples)
: advi_base<Model, stan::variational::normal_lowrank, BaseRNG>(
m, cont_params, rng, n_monte_carlo_grad, n_monte_carlo_elbo,
eval_elbo, n_posterior_samples),
rank_(rank) {
static const char* function = "stan::variational::advi_lowrank";
math::check_positive(function, "Approximation rank", rank_);
}

protected:
size_t rank_;

private:
stan::variational::normal_lowrank init_variational(
Eigen::VectorXd& cont_params) const {
return stan::variational::normal_lowrank(cont_params, rank_);
}

stan::variational::normal_lowrank init_variational(size_t dimension) const {
return stan::variational::normal_lowrank(dimension, rank_);
}
};
} // namespace variational
} // namespace stan
Expand Down
66 changes: 10 additions & 56 deletions src/stan/variational/base_family.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

namespace stan {
namespace variational {

class base_family {
public:
// Constructors
Expand All @@ -22,63 +21,18 @@ class base_family {
// Distribution-based operations
virtual const Eigen::VectorXd& mean() const = 0;
virtual double entropy() const = 0;
virtual Eigen::VectorXd transform(const Eigen::VectorXd& eta) const = 0;
/**
* Assign a draw from this mean field approximation to the
* specified vector using the specified random number generator.
*
* @tparam BaseRNG Class of random number generator.
* @param[in] rng Base random number generator.
* @param[out] eta Vector to which the draw is assigned; dimension has to be
* the same as the dimension of variational q.
* @throws std::range_error If the index is out of range.
*/

Eigen::VectorXd transform(const Eigen::VectorXd& eta) const;

template <class BaseRNG>
void sample(BaseRNG& rng, Eigen::VectorXd& eta) const {
// Draw from standard normal and transform to real-coordinate space
for (int d = 0; d < dimension(); ++d)
eta(d) = stan::math::normal_rng(0, 1, rng);
eta = transform(eta);
}
/**
* Draw a posterior sample from the normal distribution,
* and return its log normal density. The constants are dropped.
*
* @param[in] rng Base random number generator.
* @param[out] eta Vector to which the draw is assigned; dimension has to be
* the same as the dimension of variational q. eta will be transformed into
* variational posteriors.
* @param[out] log_g The log density in the variational approximation;
* The constant term is dropped.
* @throws std::range_error If the index is out of range.
*/
Eigen::VectorXd sample(BaseRNG& rng, Eigen::VectorXd& eta) const;

double calc_log_g(Eigen::VectorXd& eta) const;

template <class BaseRNG>
void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const {
// Draw from the approximation
for (int d = 0; d < dimension(); ++d) {
eta(d) = stan::math::normal_rng(0, 1, rng);
}
// Compute the log density before transformation
log_g = calc_log_g(eta);
// Transform to real-coordinate space
eta = transform(eta);
}
/**
* Compute the unnormalized log unit normal density wrt eta. All constants are
* dropped.
*
* @param[in] eta Vector; dimension has to be the same as the dimension
* of variational q.
* @return The unnormalized log density in the variational approximation;
* @throws std::range_error If the index is out of range.
*/
double calc_log_g(const Eigen::VectorXd& eta) const {
double log_g = 0;
for (int d = 0; d < dimension(); ++d) {
log_g += -stan::math::square(eta(d)) * 0.5;
}
return log_g;
}
Eigen::VectorXd sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta,
double& log_g) const;

template <class M, class BaseRNG>
void calc_grad(base_family& elbo_grad, M& m, Eigen::VectorXd& cont_params,
int n_monte_carlo_grad, BaseRNG& rng,
Expand Down
Loading