diff --git a/lib/stan_math b/lib/stan_math index 29843fe934f..bd0404db950 160000 --- a/lib/stan_math +++ b/lib/stan_math @@ -1 +1 @@ -Subproject commit 29843fe934fdb25ee1cc76d1f46d70e07a0bbf05 +Subproject commit bd0404db9509819c9bcb7905d38759dc773e010d diff --git a/src/stan/services/experimental/advi/defaults.hpp b/src/stan/services/experimental/advi/defaults.hpp index d41b6b13260..75fb37570ea 100644 --- a/src/stan/services/experimental/advi/defaults.hpp +++ b/src/stan/services/experimental/advi/defaults.hpp @@ -9,6 +9,17 @@ namespace services { namespace experimental { namespace advi { +/** + * Rank of the approximation when using low-rank ADVI. + */ +struct rank { + static std::string description() { + return "Rank of the covariance approximation when using low-rank ADVI."; + } + + static int default_value() { return 1; } +}; + /** * Number of samples for Monte Carlo estimate of gradients. */ diff --git a/src/stan/services/experimental/advi/lowrank.hpp b/src/stan/services/experimental/advi/lowrank.hpp new file mode 100644 index 00000000000..57c8efb79a4 --- /dev/null +++ b/src/stan/services/experimental/advi/lowrank.hpp @@ -0,0 +1,90 @@ +#ifndef STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP +#define STAN_SERVICES_EXPERIMENTAL_ADVI_LOWRANK_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace services { +namespace experimental { +namespace advi { + +/** + * Runs low rank ADVI. + * + * @tparam Model A model implementation + * @param[in] model Input model to test (with data already instantiated) + * @param[in] init var context for initialization + * @param[in] random_seed random seed for the random number generator + * @param[in] chain chain id to advance the random number generator + * @param[in] init_radius radius to initialize + * @param[in] grad_samples number of samples for Monte Carlo estimate + * of gradients + * @param[in] elbo_samples number of samples for Monte Carlo estimate + * of ELBO + * @param[in] max_iterations maximum number of iterations + * @param[in] tol_rel_obj convergence tolerance on the relative norm + * of the objective + * @param[in] rank the rank of the approximation + * @param[in] eta stepsize scaling parameter for variational inference + * @param[in] adapt_engaged adaptation engaged? + * @param[in] adapt_iterations number of iterations for eta adaptation + * @param[in] eval_elbo evaluate ELBO every Nth iteration + * @param[in] output_samples number of posterior samples to draw and + * save + * @param[in,out] interrupt callback to be called every iteration + * @param[in,out] logger Logger for messages + * @param[in,out] init_writer Writer callback for unconstrained inits + * @param[in,out] parameter_writer output for parameter values + * @param[in,out] diagnostic_writer output for diagnostic values + * @return error_codes::OK if successful + */ +template +int lowrank(Model& model, const stan::io::var_context& init, + unsigned int random_seed, unsigned int chain, double init_radius, + int grad_samples, int elbo_samples, int max_iterations, + double tol_rel_obj, int rank, double eta, bool adapt_engaged, + int adapt_iterations, int eval_elbo, int output_samples, + callbacks::interrupt& interrupt, callbacks::logger& logger, + callbacks::writer& init_writer, callbacks::writer& parameter_writer, + callbacks::writer& diagnostic_writer) { + util::experimental_message(logger); + + boost::ecuyer1988 rng = util::create_rng(random_seed, chain); + + std::vector disc_vector; + std::vector cont_vector = util::initialize( + model, init, rng, init_radius, true, logger, init_writer); + + std::vector names; + names.push_back("lp__"); + names.push_back("log_p__"); + names.push_back("log_g__"); + model.constrained_param_names(names, true, true); + parameter_writer(names); + + Eigen::VectorXd cont_params + = Eigen::Map(&cont_vector[0], cont_vector.size(), 1); + + stan::variational::advi_lowrank cmd_advi( + model, cont_params, rng, rank, grad_samples, elbo_samples, eval_elbo, + output_samples); + cmd_advi.run(eta, adapt_engaged, adapt_iterations, tol_rel_obj, + max_iterations, logger, parameter_writer, diagnostic_writer); + + return 0; +} +} // namespace advi +} // namespace experimental +} // namespace services +} // namespace stan +#endif diff --git a/src/stan/variational/advi.hpp b/src/stan/variational/advi.hpp index 4dca19d0335..028521d82c7 100644 --- a/src/stan/variational/advi.hpp +++ b/src/stan/variational/advi.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -30,14 +31,15 @@ namespace variational { * * Implements "black box" variational inference using stochastic gradient * ascent to maximize the Evidence Lower Bound for a given model - * and variational family. + * and variational family. This base class encapsulates the mean-field, + * low-rank, and full-rank variational posterior classes. * * @tparam Model class of model * @tparam Q class of variational distribution * @tparam BaseRNG class of random number generator */ template -class advi { +class advi_base { public: /** * Constructor @@ -54,9 +56,9 @@ class advi { * @throw std::runtime_error if eval_elbo is not positive * @throw std::runtime_error if n_posterior_samples is not positive */ - advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng, - int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo, - int n_posterior_samples) + advi_base(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng, + int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo, + int n_posterior_samples) : model_(m), cont_params_(cont_params), rng_(rng), @@ -192,10 +194,10 @@ class advi { } // Variational family to store gradients - Q elbo_grad = Q(model_.num_params_r()); + Q elbo_grad = init_variational(model_.num_params_r()); // Adaptive step-size sequence - Q history_grad_squared = Q(model_.num_params_r()); + Q history_grad_squared = init_variational(model_.num_params_r()); double tau = 1.0; double pre_factor = 0.9; double post_factor = 0.1; @@ -287,7 +289,7 @@ class advi { history_grad_squared.set_to_zero(); } ++eta_sequence_index; - variational = Q(cont_params_); + variational = init_variational(cont_params_); } return eta_best; } @@ -317,10 +319,10 @@ class advi { stan::math::check_positive(function, "Maximum iterations", max_iterations); // Gradient parameters - Q elbo_grad = Q(model_.num_params_r()); + Q elbo_grad = init_variational(model_.num_params_r()); // Stepsize sequence parameters - Q history_grad_squared = Q(model_.num_params_r()); + Q history_grad_squared = init_variational(model_.num_params_r()); double tau = 1.0; double pre_factor = 0.9; double post_factor = 0.1; @@ -462,7 +464,7 @@ class advi { diagnostic_writer("iter,time_in_seconds,ELBO"); // Initialize variational approximation - Q variational = Q(cont_params_); + Q variational = init_variational(cont_params_); if (adapt_engaged) { eta = adapt_eta(variational, adapt_iterations, logger); @@ -562,6 +564,86 @@ class advi { int n_monte_carlo_elbo_; int eval_elbo_; int n_posterior_samples_; + + private: + virtual Q init_variational(Eigen::VectorXd& cont_params) const = 0; + virtual Q init_variational(size_t dimension) const = 0; +}; + +/** + * The ADVI implementation used for meanfield and full-rank approximations. + * + * @tparam Model Class of model + * @tparam Q Class of variational distribution, either mean-field or low-rank. + * @tparam BaseRNG Class of random number generator + */ +template +class advi : public advi_base { + public: + advi(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng, + int n_monte_carlo_grad, int n_monte_carlo_elbo, int eval_elbo, + int n_posterior_samples) + : advi_base(m, cont_params, rng, n_monte_carlo_grad, + n_monte_carlo_elbo, eval_elbo, + n_posterior_samples) {} + + private: + Q init_variational(Eigen::VectorXd& cont_params) const { + return Q(cont_params); + } + + Q init_variational(size_t dimension) const { return Q(dimension); } +}; + +/** + * The ADVI implementation used only for low-rank approximations. + * + * @tparam Model Class of model. + * @tparam BaseRNG Class of random number generator. + */ +template +class advi_lowrank + : public advi_base { + public: + /** + * Constructor + * + * @param[in] m stan model + * @param[in] cont_params initialization of continuous parameters + * @param[in,out] rng random number generator + * @param[in] rank rank of approximation + * @param[in] n_monte_carlo_grad number of samples for gradient computation + * @param[in] n_monte_carlo_elbo number of samples for ELBO computation + * @param[in] eval_elbo evaluate ELBO at every "eval_elbo" iters + * @param[in] n_posterior_samples number of samples to draw from posterior + * @throw std::runtime_error if n_monte_carlo_grad is not positive + * @throw std::runtime_error if n_monte_carlo_elbo is not positive + * @throw std::runtime_error if eval_elbo is not positive + * @throw std::runtime_error if n_posterior_samples is not positive + */ + advi_lowrank(Model& m, Eigen::VectorXd& cont_params, BaseRNG& rng, + size_t rank, int n_monte_carlo_grad, int n_monte_carlo_elbo, + int eval_elbo, int n_posterior_samples) + : advi_base( + m, cont_params, rng, n_monte_carlo_grad, n_monte_carlo_elbo, + eval_elbo, n_posterior_samples), + rank_(rank) { + static const char* function = "stan::variational::advi_lowrank"; + math::check_positive(function, "Approximation rank", rank_); + } + + protected: + size_t rank_; + + private: + stan::variational::normal_lowrank init_variational( + Eigen::VectorXd& cont_params) const { + return stan::variational::normal_lowrank(cont_params, rank_); + } + + stan::variational::normal_lowrank init_variational(size_t dimension) const { + return stan::variational::normal_lowrank(dimension, rank_); + } }; } // namespace variational } // namespace stan diff --git a/src/stan/variational/base_family.hpp b/src/stan/variational/base_family.hpp index 1235a75a34a..d06a34ce806 100644 --- a/src/stan/variational/base_family.hpp +++ b/src/stan/variational/base_family.hpp @@ -8,7 +8,6 @@ namespace stan { namespace variational { - class base_family { public: // Constructors @@ -22,63 +21,18 @@ class base_family { // Distribution-based operations virtual const Eigen::VectorXd& mean() const = 0; virtual double entropy() const = 0; - virtual Eigen::VectorXd transform(const Eigen::VectorXd& eta) const = 0; - /** - * Assign a draw from this mean field approximation to the - * specified vector using the specified random number generator. - * - * @tparam BaseRNG Class of random number generator. - * @param[in] rng Base random number generator. - * @param[out] eta Vector to which the draw is assigned; dimension has to be - * the same as the dimension of variational q. - * @throws std::range_error If the index is out of range. - */ + + Eigen::VectorXd transform(const Eigen::VectorXd& eta) const; + template - void sample(BaseRNG& rng, Eigen::VectorXd& eta) const { - // Draw from standard normal and transform to real-coordinate space - for (int d = 0; d < dimension(); ++d) - eta(d) = stan::math::normal_rng(0, 1, rng); - eta = transform(eta); - } - /** - * Draw a posterior sample from the normal distribution, - * and return its log normal density. The constants are dropped. - * - * @param[in] rng Base random number generator. - * @param[out] eta Vector to which the draw is assigned; dimension has to be - * the same as the dimension of variational q. eta will be transformed into - * variational posteriors. - * @param[out] log_g The log density in the variational approximation; - * The constant term is dropped. - * @throws std::range_error If the index is out of range. - */ + Eigen::VectorXd sample(BaseRNG& rng, Eigen::VectorXd& eta) const; + + double calc_log_g(Eigen::VectorXd& eta) const; + template - void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const { - // Draw from the approximation - for (int d = 0; d < dimension(); ++d) { - eta(d) = stan::math::normal_rng(0, 1, rng); - } - // Compute the log density before transformation - log_g = calc_log_g(eta); - // Transform to real-coordinate space - eta = transform(eta); - } - /** - * Compute the unnormalized log unit normal density wrt eta. All constants are - * dropped. - * - * @param[in] eta Vector; dimension has to be the same as the dimension - * of variational q. - * @return The unnormalized log density in the variational approximation; - * @throws std::range_error If the index is out of range. - */ - double calc_log_g(const Eigen::VectorXd& eta) const { - double log_g = 0; - for (int d = 0; d < dimension(); ++d) { - log_g += -stan::math::square(eta(d)) * 0.5; - } - return log_g; - } + Eigen::VectorXd sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, + double& log_g) const; + template void calc_grad(base_family& elbo_grad, M& m, Eigen::VectorXd& cont_params, int n_monte_carlo_grad, BaseRNG& rng, diff --git a/src/stan/variational/families/normal_fullrank.hpp b/src/stan/variational/families/normal_fullrank.hpp index 4f421a183be..85175040ed6 100644 --- a/src/stan/variational/families/normal_fullrank.hpp +++ b/src/stan/variational/families/normal_fullrank.hpp @@ -351,28 +351,49 @@ class normal_fullrank : public base_family { return (L_chol_ * eta) + mu_; } + /** + * Sample from the variational distribution. + * + * @tparam BaseRNG Random number generator class. + * @param[in] rng Random number generator. + * @param[in,out] zeta Input standard normals, output variational sample. + */ template - void sample(BaseRNG& rng, Eigen::VectorXd& eta) const { + void sample(BaseRNG& rng, Eigen::VectorXd& zeta) const { // Draw from standard normal and transform to real-coordinate space for (int d = 0; d < dimension(); ++d) - eta(d) = stan::math::normal_rng(0, 1, rng); - eta = transform(eta); + zeta(d) = stan::math::normal_rng(0, 1, rng); + zeta = transform(zeta); } + /** + * Sample from the variational distribution and return its log normal + * density. + * + * @tparam BaseRNG Random number generator class. + * @param[in] rng Random number generator. + * @param[in,out] zeta Input standard normals, output variational sample. + * @param[in] log_g The log-probability of the sample. + */ template - void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const { + void sample_log_g(BaseRNG& rng, Eigen::VectorXd& zeta, double& log_g) const { // Draw from the approximation for (int d = 0; d < dimension(); ++d) { - eta(d) = stan::math::normal_rng(0, 1, rng); + zeta(d) = stan::math::normal_rng(0, 1, rng); } // Compute the log density before transformation - log_g = calc_log_g(eta); + log_g = calc_log_g(zeta); // Transform to real-coordinate space - eta = transform(eta); + zeta = transform(zeta); } + /** + * Calculate the log-density of a vector of independent std normals. + * + * @param[in] eta The random sample. + * @return The log-probability of the random sample. + */ double calc_log_g(const Eigen::VectorXd& eta) const { - // Compute the log density wrt normal distribution dropping constants double log_g = 0; for (int d = 0; d < dimension(); ++d) { log_g += -stan::math::square(eta(d)) * 0.5; diff --git a/src/stan/variational/families/normal_lowrank.hpp b/src/stan/variational/families/normal_lowrank.hpp new file mode 100644 index 00000000000..b76d97be69a --- /dev/null +++ b/src/stan/variational/families/normal_lowrank.hpp @@ -0,0 +1,620 @@ +#ifndef STAN_VARIATIONAL_NORMAL_LOWRANK_HPP +#define STAN_VARIATIONAL_NORMAL_LOWRANK_HPP + +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace variational { +/** + * Variational family approximation with low-rank multivariate normal + * distribution. + */ +class normal_lowrank : public base_family { + private: + /** + * Mean vector. + */ + Eigen::VectorXd mu_; + + /** + * Low-rank covariance factor. + */ + Eigen::MatrixXd B_; + + /** + * Additive white noise for covariance positive definite-ness. + */ + Eigen::VectorXd log_d_; + + /** + * Dimensionality of the distribution. + */ + const int dimension_; + + /** + * Rank of the low-rank covariance factor. + */ + const int rank_; + + /** + * Raise a domain exception if the specified vector contains not-a-number + * values or is not of the correct dimension. + * + * @param[in] mu Mean vector. + * @throw std::domain_error If the mean vector contains NaN values or does + * not match this distribution's dimensionality. + */ + void validate_mean(const char* function, const Eigen::VectorXd& mu) { + stan::math::check_not_nan(function, "Mean vector", mu); + stan::math::check_size_match(function, "Dimension of input vector", + mu.size(), "Dimension of current vector", + dimension()); + } + + /** + * Raise a domain exception if the specified matrix is not of the correct + * dimension or rank, is not lower triangular, or contains not-a-number + * values. + * + * @param[in] B Low-rank factor. + * @throw std::domain_error If the matrix is not of the correct dimension or + * rank, or if it contains not-a-number values. + */ + void validate_factor(const char* function, const Eigen::MatrixXd& B) { + stan::math::check_not_nan(function, "Low rank factor", B); + stan::math::check_lower_triangular(function, "Low rank factor", B); + stan::math::check_size_match(function, "Dimension of mean vector", + dimension(), "Dimension of low-rank factor", + B.rows()); + stan::math::check_size_match(function, "Rank of factor", B.cols(), + "Rank of approximation", rank()); + } + + /** + * Raise a domain exception if the specified matrix is not of the correct + * dimension, or contains not-a-number-values. + * + * @param[in] log_d Log std vector. + * @throw std::domain_error If the matrix is not of the correct dimension or + * rank, or if it contains not-a-number values. + */ + void validate_noise(const char* function, const Eigen::VectorXd& log_d) { + stan::math::check_not_nan(function, "log std vector", log_d); + stan::math::check_size_match(function, "Dimension of mean vector", + dimension(), "Dimension of log std vector", + log_d.size()); + } + + public: + /** + * Construct a variational distribution of the specified dimensionality with + * a zero mean, zero low-rank factor matrix, and a zero log-std vector, + * corresponding to an identity covariance. + * + * @param[in] dimension The dimensionality of the distribution. + * @param[in] rank The rank of the low-rank factor. + */ + explicit normal_lowrank(size_t dimension, size_t rank) + : mu_(Eigen::VectorXd::Zero(dimension)), + B_(Eigen::MatrixXd::Zero(dimension, rank)), + log_d_(Eigen::VectorXd::Zero(dimension)), + dimension_(dimension), + rank_(rank) {} + + /** + * Construct a variational distribution with specified mean vector, and + * identity covariance. + * + * @params[in] mu Mean vector. + * @params[in] rank Rank of the approximation. + */ + explicit normal_lowrank(const Eigen::VectorXd& mu, size_t rank) + : mu_(mu), + B_(Eigen::MatrixXd::Zero(mu.size(), rank)), + log_d_(Eigen::VectorXd::Zero(mu.size())), + dimension_(mu.size()), + rank_(rank) {} + + /** + * Construct a variational distribution with specified mean and covariance + * parameters. + * + * @param[in] mu Mean vector. + * @param[in] B Low-rank covariance factor. + * @param[in] log_d Additive log std vector. + * @throws std::domain_error If the low-rank covariance factor is not not + * lower-triangular, if the parameters have inconsistent dimensionality, + * or if any NaNs are present. + */ + explicit normal_lowrank(const Eigen::VectorXd& mu, const Eigen::MatrixXd& B, + const Eigen::VectorXd& log_d) + : mu_(mu), B_(B), log_d_(log_d), dimension_(mu.size()), rank_(B.cols()) { + static const char* function = "stan::variational::normal_lowrank"; + validate_mean(function, mu); + validate_factor(function, B); + validate_noise(function, log_d); + } + + /** + * Return the dimensionality of the approximation. + */ + int dimension() const { return dimension_; } + + /** + * Return the rank of the approximation. + */ + int rank() const { return rank_; } + + /** + * Return the mean vector. + */ + const Eigen::VectorXd& mean() const { return mu(); } + + /** + * Return the mean vector. + */ + const Eigen::VectorXd& mu() const { return mu_; } + + /** + * Return the low-rank covariance factor. + */ + const Eigen::MatrixXd& B() const { return B_; } + + /** + * Return the additive log-std. + */ + const Eigen::VectorXd& log_d() const { return log_d_; } + + /** + * Set the mean vector. + * + * @param[in] mu Mean vector. + * @throw std::domain_error If the size of the specified mean vector does not + * match the stored dimension of this approximation. + */ + void set_mu(const Eigen::VectorXd& mu) { + static const char* function = "stan::variational::set_mu"; + validate_mean(function, mu); + mu_ = mu; + } + + /** + * Set the low-rank factor to the specified value. + * + * @param[in] B The low-rank factor of the covariance matrix. + * @throw std::domain_error If the specified matrix does not have the correct + * dimension or rank, is not lower triangular, or contains NaNs. + */ + void set_B(const Eigen::MatrixXd& B) { + static const char* function = "stan::variational::set_B"; + validate_factor(function, B); + B_ = B; + } + + /** + * Set the additive log-std component of the covariance. + * + * @param[in] log_d The log-std vector. + * @throw std::domain_error If the size of the log-std vector does not match + * the dimension of the approximation, or contains NaNs. + */ + void set_log_d(const Eigen::VectorXd& log_d) { + static const char* function = "stan::variational::set_log_d"; + validate_noise(function, log_d); + log_d_ = log_d; + } + + /** + * Set the mean, low-rank factor, and log-std of the approximation to zero. + */ + void set_to_zero() { + mu_ = Eigen::VectorXd::Zero(dimension()); + B_ = Eigen::MatrixXd::Zero(dimension(), rank()); + log_d_ = Eigen::VectorXd::Zero(dimension()); + } + + /** + * Return a new low-rank approximation resulting from squaring the entries in + * the mean, low-rank factor, and log-std vector for the covariance matrix. + * The new approximation does not hold any references to this approximation. + */ + normal_lowrank square() const { + return normal_lowrank(Eigen::VectorXd(mu_.array().square()), + Eigen::MatrixXd(B_.array().square()), + Eigen::VectorXd(log_d_.array().square())); + } + + /** + * Return a new low-rank approximation resulting from taking the square root + * of the entries in the mean, low-rank factor, and log-std vector for the + * covariance matrix. The new approximation does not hold any references to + * this approximation. + */ + normal_lowrank sqrt() const { + return normal_lowrank(Eigen::VectorXd(mu_.array().sqrt()), + Eigen::MatrixXd(B_.array().sqrt()), + Eigen::VectorXd(log_d_.array().sqrt())); + } + + /** + * Return this approximation after setting its mean vector, low-rank factor, + * and log-std vector to the values given by the specified approximation. + * + * @param[in] rhf Approximation from which to gather the mean and covariance + * parameters. + * @return This approximation after assignment. + * @throw std::domain_error If the dimensionality or rank of the specified + * approximation does not match this approximation's dimensionality or rank. + */ + normal_lowrank& operator=(const normal_lowrank& rhs) { + static const char* function + = "stan::variational::normal_lowrank::operator="; + stan::math::check_size_match(function, "Dimension of lhs", dimension(), + "Dimension of rhs", rhs.dimension()); + stan::math::check_size_match(function, "Rank of lhs", rank(), "Rank of rhs", + rhs.rank()); + mu_ = rhs.mu(); + B_ = rhs.B(); + log_d_ = rhs.log_d(); + return *this; + } + + /** + * Add the mean, low-rank covariance factor, and log-std of the specified + * approximation to this approximation. + * + * @param[in] rhs Approximation from which to gather the mean and covariance + * parameters. + * @return This approximation after adding the specified approximation. + * @throw std::domain_error If the dimensionality or rank of the specified + * approximation does not match this approximation's dimensionality. + */ + normal_lowrank& operator+=(const normal_lowrank& rhs) { + static const char* function + = "stan::variational::normal_lowrank::operator+="; + stan::math::check_size_match(function, "Dimension of lhs", dimension(), + "Dimension of rhs", rhs.dimension()); + stan::math::check_size_match(function, "Rank of lhs", rank(), "Rank of rhs", + rhs.rank()); + mu_ += rhs.mu(); + B_ += rhs.B(); + log_d_ += rhs.log_d(); + return *this; + } + + /** + * Return this approximation after elementwise division by the specified + * approximation's mean, low-rank factor, and log-std vector. + * + * @param[in] rhs Approximation from which to gather the mean and covariance + * parameters. + * @return This approximation after dividing by the specified approximation. + * @throw std::domain_error If the dimensionality or rank of the specified + * approximation does not match this approximation's dimensionality. + */ + inline normal_lowrank& operator/=(const normal_lowrank& rhs) { + static const char* function + = "stan::variational::normal_lowrank::operator/="; + + stan::math::check_size_match(function, "Dimension of lhs", dimension(), + "Dimension of rhs", rhs.dimension()); + stan::math::check_size_match(function, "Rank of lhs", rank(), "Rank of rhs", + rhs.rank()); + mu_.array() /= rhs.mu().array(); + B_.array() /= rhs.B().array(); + log_d_.array() /= rhs.log_d().array(); + return *this; + } + + /** + * Return this approximation after adding the specified scalar to each entry + * in the mean, low-rank covariance factor, and log-std vector. + * + * Warning: No finiteness check is made on the scalar, so it may + * introduce NaNs. + * + * @param[in] scalar Scalar to add. + * @return This approximation after elementwise addition of the specified + * scalar. + */ + normal_lowrank& operator+=(double scalar) { + mu_.array() += scalar; + B_.array() += scalar; + log_d_.array() += scalar; + return *this; + } + + /** + * Return this approximation after multiplying the mean, low-rank factor, and + * log-std vector by the specified scalar. + * + * Warning: No finiteness check is made on the scalar, so it may + * introduce NaNs. + * + * @param[in] scalar Scalar to multiply by. + * @return This approximation after elementwise multiplication of the + * specified scalar. + */ + normal_lowrank& operator*=(double scalar) { + mu_ *= scalar; + B_ *= scalar; + log_d_ *= scalar; + return *this; + } + + /** + * Return the entropy of this approximation. + * + *

The entropy is defined using the matrix determinant lemma as + * 0.5 * dim * (1 + log2pi) + 0.5 * log det (B^T B + D^2) + * where + * log det (B^T B + D^2) = log det(I + B^T D^2 B) + 2 * log det(D) + * = log det(I + B^T D^2 B) + 2 * sum(log_d) + * where the last equality holds because the representation of D stored is + * its diag vector. + * + * @return Entropy of this approximation. + */ + double entropy() const { + static int r = rank(); + static double mult = 0.5 * (1.0 + stan::math::LOG_TWO_PI); + double result = mult * dimension(); + result += 0.5 + * log((Eigen::MatrixXd::Identity(r, r) + + B_.transpose() + * log_d_.array() + .exp() + .square() + .matrix() + .asDiagonal() + .inverse() + * B_) + .determinant()); + for (int d = 0; d < dimension(); ++d) { + result += log_d_(d); + } + return result; + } + + /** + * Return the transform of the specified vector using the mean and covariance + * parameters. + * + * The transform is defined by + * S^{-1}(eta) = B * z + D * eps + mu + * where D = diag(exp(log_d)), z = head(eta, rank) and + * eps = tail(eta, dimension). + */ + Eigen::VectorXd transform(const Eigen::VectorXd& eta) const { + static const char* function + = "stan::variational::normal_lowrank::transform"; + stan::math::check_size_match(function, "Dimension of input vector", + eta.size(), "Sum of dimension and rank", + dimension() + rank()); + stan::math::check_not_nan(function, "Input vector", eta); + Eigen::VectorXd z = eta.head(rank()); + Eigen::VectorXd eps = eta.tail(dimension()); + return (log_d_.array().exp() * eps.array()).matrix() + (B_ * z) + mu_; + } + + /** + * Sample from the variational distribution. + * + * @tparam BaseRNG Random number generator class. + * @param[in] rng Random number generator. + * @param[out] zeta Output variational standard normals. + */ + template + void sample(BaseRNG& rng, Eigen::VectorXd& zeta) const { + // Draw from standard normal and transform to real-coordinate space + Eigen::VectorXd eta = Eigen::VectorXd::Zero(dimension() + rank()); + for (int d = 0; d < dimension() + rank(); ++d) + eta(d) = stan::math::normal_rng(0, 1, rng); + zeta = transform(eta); + } + + /** + * Sample from the variational distribution and return its log normal + * density. + * + * @param[in] rng Base random number generator. + * @param[out] eta Vector to which the draw is assigned; dimension has to be + * the same as the dimension of variational q. eta will be transformed into + * variational posteriors. + * @param[out] log_g The log density in the variational approximation; + * The constant term is dropped. + * @throws std::range_error If the index is out of range. + */ + template + void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const { + // Draw from the approximation + Eigen::VectorXd zeta = Eigen::VectorXd::Zero(dimension() + rank()); + for (int d = 0; d < dimension() + rank(); ++d) { + zeta(d) = stan::math::normal_rng(0, 1, rng); + } + // Compute the log density before transformation + log_g = calc_log_g(zeta); + // Transform to real-coordinate space + eta = transform(zeta); + } + + /** + * Calculates the gradient with respect to the mean and covariance parameters + * in parallel. + * + * @tparam M Model class. + * @tparam BaseRNG Class of base random number generator. + * @param[in] elbo_grad Approximation to store gradient. + * @param[in] m Model. + * @param[in] cont_params Continuous parameters. + * @param[in] n_monte_carlo_grad Sample size for gradient computation. + * @param[in,out] rng Random number generator. + * @param[in,out] logger Logger for messages. + * @throw std::domain_error If the number of divergent iterations exceeds its + * specified bounds. + */ + template + void calc_grad(normal_lowrank& elbo_grad, M& m, Eigen::VectorXd& cont_params, + int n_monte_carlo_grad, BaseRNG& rng, + callbacks::logger& logger) const { + static const char* function + = "stan::variational::normal_lowrank::calc_grad"; + + stan::math::check_size_match(function, "Dimension of elbo_grad", + elbo_grad.dimension(), + "Dimension of variational q", dimension()); + stan::math::check_size_match(function, "Dimension of variational q", + dimension(), "Dimension of variables in model", + cont_params.size()); + + stan::math::check_size_match(function, "Rank of elbo_grad", + elbo_grad.rank(), "Rank of variational q", + rank()); + + Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(dimension()); + Eigen::MatrixXd B_grad = Eigen::MatrixXd::Zero(dimension(), rank()); + Eigen::VectorXd d_grad = Eigen::VectorXd::Zero(dimension()); + Eigen::VectorXd log_d_grad = Eigen::VectorXd::Zero(dimension()); + + double tmp_lp = 0.0; + Eigen::VectorXd eta = Eigen::VectorXd::Zero(dimension() + rank()); + Eigen::VectorXd zeta = Eigen::VectorXd::Zero(dimension()); + + // (B.B^T + D^2)^-1 by Woodbury formula + Eigen::MatrixXd d_inv2 + = log_d_.array().exp().square().matrix().asDiagonal().inverse(); + Eigen::MatrixXd Bt = B_.transpose(); + Eigen::MatrixXd I = Eigen::MatrixXd::Identity(rank(), rank()); + Eigen::MatrixXd woodbury + = d_inv2 - d_inv2 * B_ * (I + Bt * d_inv2 * B_).inverse() * Bt * d_inv2; + + Eigen::VectorXd tmp_mu_grad = Eigen::VectorXd::Zero(dimension()); + + // Naive Monte Carlo integration + static const int n_retries = 10; + for (int i = 0, n_monte_carlo_drop = 0; i < n_monte_carlo_grad;) { + // Draw from standard normal and transform to real-coordinate space + for (int d = 0; d < dimension() + rank(); ++d) { + eta(d) = stan::math::normal_rng(0, 1, rng); + } + Eigen::VectorXd z = eta.head(rank()); + Eigen::VectorXd eps = eta.tail(dimension()); + zeta = transform(eta); + + // (B.B^T + D^2)^-1 . (B.z + d*eps) + Eigen::VectorXd woodbury_zeta = woodbury * (zeta - mu_); + + try { + std::stringstream ss; + stan::model::gradient(m, zeta, tmp_lp, tmp_mu_grad, &ss); + if (ss.str().length() > 0) + logger.info(ss); + stan::math::check_finite(function, "Gradient of mu", mu_grad); + + mu_grad += tmp_mu_grad; + for (int ii = 0; ii < dimension(); ++ii) { + for (int jj = 0; jj <= ii && jj < rank(); ++jj) { + B_grad(ii, jj) += (tmp_mu_grad(ii) + woodbury_zeta(ii)) * z(jj); + } + d_grad(ii) += (tmp_mu_grad(ii) + woodbury_zeta(ii)) * eps(ii); + log_d_grad(ii) += d_grad(ii) * exp(log_d_(ii)); + } + ++i; + } catch (const std::exception& e) { + ++n_monte_carlo_drop; + if (n_monte_carlo_drop >= n_retries * n_monte_carlo_grad) { + const char* name = "The number of dropped evaluations"; + const char* msg1 = "has reached its maximum amount ("; + int y = n_retries * n_monte_carlo_grad; + const char* msg2 + = "). Your model may be either severely " + "ill-conditioned or misspecified."; + stan::math::domain_error(function, name, y, msg1, msg2); + } + } + } + mu_grad /= static_cast(n_monte_carlo_grad); + B_grad /= static_cast(n_monte_carlo_grad); + log_d_grad /= static_cast(n_monte_carlo_grad); + + elbo_grad.set_mu(mu_grad); + elbo_grad.set_B(B_grad); + elbo_grad.set_log_d(log_d_grad); + } + + /** + * Calculate the log-density of a vector of independent std normals. + * + * @param[in] eta The random sample. + * @return The log-probability of the random sample. + */ + double calc_log_g(const Eigen::VectorXd& eta) const { + double log_g = 0; + for (int d = 0; d < rank() + dimension(); ++d) { + log_g += -stan::math::square(eta(d)) * 0.5; + } + return log_g; + } +}; + +/** + * Return a new approximation resulting from elementwise addition of + * of the first specified approximation to the second. + * + * @param[in] lhs First approximation. + * @param[in] rhs Second approximation. + * @return Elementwise addition of the specified approximations. + * @throw std::domain_error If the dimensionalities do not match. + */ +inline normal_lowrank operator+(normal_lowrank lhs, const normal_lowrank& rhs) { + return lhs += rhs; +} + +/** + * Return a new approximation resulting from elementwise division of + * of the first specified approximation by the second. + * + * @param[in] lhs First approximation. + * @param[in] rhs Second approximation. + * @return Elementwise division of the specified approximations. + * @throw std::domain_error If the dimensionalities do not match. + */ +inline normal_lowrank operator/(normal_lowrank lhs, const normal_lowrank& rhs) { + return lhs /= rhs; +} + +/** + * Return a new approximation resulting from elementwise addition + * of the specified scalar to the mean, low-rank factor, and log-std + * vector entries for the specified approximation. + * + * @param[in] scalar Scalar value + * @param[in] rhs Approximation. + * @return Addition of scalar to specified approximation. + */ +inline normal_lowrank operator+(double scalar, normal_lowrank rhs) { + return rhs += scalar; +} + +/** + * Return a new approximation resulting from elementwise + * multiplication of the specified scalar to the mean low-rank factor, + * and log-std for the specified approximation. + * + * @param[in] scalar Scalar value + * @param[in] rhs Approximation. + * @return Multiplication of scalar by the specified approximation. + */ +inline normal_lowrank operator*(double scalar, normal_lowrank rhs) { + return rhs *= scalar; +} +} // namespace variational +} // namespace stan + +#endif diff --git a/src/stan/variational/families/normal_meanfield.hpp b/src/stan/variational/families/normal_meanfield.hpp index afb47c0b13b..8cc8c79240b 100644 --- a/src/stan/variational/families/normal_meanfield.hpp +++ b/src/stan/variational/families/normal_meanfield.hpp @@ -314,6 +314,65 @@ class normal_meanfield : public base_family { return eta.array().cwiseProduct(omega_.array().exp()) + mu_.array(); } + /** + * Assign a draw from this mean field approximation to the + * specified vector using the specified random number generator. + * + * @tparam BaseRNG Class of random number generator. + * @param[in] rng Base random number generator. + * @param[out] eta Vector to which the draw is assigned; dimension has to be + * the same as the dimension of variational q. + * @throws std::range_error If the index is out of range. + */ + template + void sample(BaseRNG& rng, Eigen::VectorXd& eta) const { + // Draw from standard normal and transform to real-coordinate space + for (int d = 0; d < dimension(); ++d) + eta(d) = stan::math::normal_rng(0, 1, rng); + eta = transform(eta); + } + + /** + * Draw a posterior sample from the normal distribution, + * and return its log normal density. The constants are dropped. + * + * @param[in] rng Base random number generator. + * @param[out] eta Vector to which the draw is assigned; dimension has to be + * the same as the dimension of variational q. eta will be transformed into + * variational posteriors. + * @param[out] log_g The log density in the variational approximation; + * The constant term is dropped. + * @throws std::range_error If the index is out of range. + */ + template + void sample_log_g(BaseRNG& rng, Eigen::VectorXd& eta, double& log_g) const { + // Draw from the approximation + for (int d = 0; d < dimension(); ++d) { + eta(d) = stan::math::normal_rng(0, 1, rng); + } + // Compute the log density before transformation + log_g = calc_log_g(eta); + // Transform to real-coordinate space + eta = transform(eta); + } + + /** + * Compute the unnormalized log unit normal density wrt eta. All constants are + * dropped. + * + * @param[in] eta Vector; dimension has to be the same as the dimension + * of variational q. + * @return The unnormalized log density in the variational approximation; + * @throws std::range_error If the index is out of range. + */ + double calc_log_g(const Eigen::VectorXd& eta) const { + double log_g = 0; + for (int d = 0; d < dimension(); ++d) { + log_g += -stan::math::square(eta(d)) * 0.5; + } + return log_g; + } + /** * Calculates the "blackbox" gradient with respect to both the * location vector (mu) and the log-std vector (omega) in diff --git a/src/test/unit/variational/advi_messages_test.cpp b/src/test/unit/variational/advi_messages_test.cpp index 486a8240c7e..82635aee282 100644 --- a/src/test/unit/variational/advi_messages_test.cpp +++ b/src/test/unit/variational/advi_messages_test.cpp @@ -38,17 +38,23 @@ class advi_test : public ::testing::Test { parameter_stream_.str(""); diagnostic_stream_.str(""); + // low-rank approximation rank + size_t rank = 1; + advi_meanfield_ = new stan::variational::advi< stan_model, stan::variational::normal_meanfield, rng_t>( *model_, cont_params_, base_rng_, 1, 100, 1, 1); advi_fullrank_ = new stan::variational::advi< stan_model, stan::variational::normal_fullrank, rng_t>( *model_, cont_params_, base_rng_, 1, 100, 1, 1); + advi_lowrank_ = new stan::variational::advi_lowrank( + *model_, cont_params_, base_rng_, rank, 1, 100, 1, 1); } void TearDown() { delete advi_meanfield_; delete advi_fullrank_; + delete advi_lowrank_; delete model_; } @@ -59,6 +65,7 @@ class advi_test : public ::testing::Test { rng_t> *advi_meanfield_; stan::variational::advi *advi_fullrank_; + stan::variational::advi_lowrank *advi_lowrank_; std::stringstream model_stream_; std::stringstream log_stream_; std::stringstream parameter_stream_; @@ -87,6 +94,13 @@ TEST_F(advi_test, max_iteration_warn_fullrank) { << "The message should have err_msg1 inside it."; } +TEST_F(advi_test, max_iteration_warn_lowrank) { + EXPECT_EQ(0, advi_lowrank_->run(10, 0, 50, 0.01, 1, logger, parameter_writer, + diagnostic_writer)); + EXPECT_TRUE(log_stream_.str().find(err_msg1) != std::string::npos) + << "The message should have err_msg1 inside it."; +} + TEST_F(advi_test, prev_elbo_larger_meanfield) { EXPECT_EQ(0, advi_meanfield_->run(10, 0, 50, 0.1, 100, logger, parameter_writer, diagnostic_writer)); @@ -100,3 +114,10 @@ TEST_F(advi_test, prev_elbo_larger_fullrank) { EXPECT_TRUE(log_stream_.str().find(err_msg2) != std::string::npos) << "The message should have err_msg2 inside it."; } + +TEST_F(advi_test, prev_elbo_larger_lowrank) { + EXPECT_EQ(0, advi_lowrank_->run(10, 0, 50, 0.2, 100, logger, parameter_writer, + diagnostic_writer)); + EXPECT_TRUE(log_stream_.str().find(err_msg2) != std::string::npos) + << "The message should have err_msg2 inside it."; +} diff --git a/src/test/unit/variational/advi_multivar_no_constraint_test.cpp b/src/test/unit/variational/advi_multivar_no_constraint_test.cpp index 4310938aec1..5930daf6ce2 100644 --- a/src/test/unit/variational/advi_multivar_no_constraint_test.cpp +++ b/src/test/unit/variational/advi_multivar_no_constraint_test.cpp @@ -237,3 +237,137 @@ TEST(advi_test, multivar_no_constraint_meanfield) { n_monte_carlo_grad, base_rng, logger), std::invalid_argument, error); } + +TEST(advi_test, multivar_no_constraint_lowrank) { + // Create mock data_var_context + static const std::string DATA = ""; + std::stringstream data_stream(DATA); + stan::io::dump dummy_context(data_stream); + + // Instantiate model + Model my_model(dummy_context); + + // RNG + rng_t base_rng(0); + + // Other params + int n_monte_carlo_grad = 10; + std::stringstream log_stream; + stan::callbacks::stream_logger logger(log_stream, log_stream, log_stream, + log_stream, log_stream); + + // Dummy input + Eigen::VectorXd cont_params = Eigen::VectorXd::Zero(2); + cont_params(0) = 0.75; + cont_params(1) = 0.75; + + // ADVI + size_t rank = 1; + stan::variational::advi_lowrank test_advi( + my_model, cont_params, base_rng, rank, n_monte_carlo_grad, + 1e4, // absurdly high! + 100, 1); + + // Create some arbitrary variational q() family to calculate the ELBO over + Eigen::VectorXd mu = Eigen::VectorXd::Constant(my_model.num_params_r(), 2.5); + Eigen::VectorXd fac = Eigen::MatrixXd::Constant(my_model.num_params_r(), rank, + 0.0); // no covariance + // in posterior + Eigen::VectorXd log_d + = Eigen::VectorXd::Constant(my_model.num_params_r(), + 0.0); // initializing log_d = 0 + // means sigma = 1 + stan::variational::normal_lowrank mufaclog_d + = stan::variational::normal_lowrank(mu, fac, log_d); + + double elbo = 0.0; + elbo = test_advi.calc_ELBO(mufaclog_d, logger); + + // Can calculate ELBO analytically + double zeta = -0.5 * (3 * 2 * log(2.0 * stan::math::pi()) + 18.5 + 25 + 13); + Eigen::VectorXd mu_J = Eigen::VectorXd::Zero(2); + mu_J(0) = 10.5; + mu_J(1) = 7.5; + + double elbo_true = 0.0; + elbo_true += zeta; + elbo_true += mu_J.dot(mu); + elbo_true += -0.5 * (3 * mu.dot(mu) + 3 * 2); + elbo_true += 1 + log(2.0 * stan::math::pi()); + + double const EPSILON = 0.1; + EXPECT_NEAR(elbo_true, elbo, EPSILON); + + Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(3); + Eigen::MatrixXd B_grad = Eigen::MatrixXd::Zero(3, 2); + Eigen::VectorXd log_d_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + + std::string error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (3) and " + "Dimension of log std vector (2) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(0); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (0) and " + "Dimension of low-rank factor (3) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + B_grad = Eigen::MatrixXd::Zero(my_model.num_params_r(), 1); + log_d_grad = Eigen::VectorXd::Zero(3); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (2) and " + "Dimension of log std vector (3) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + log_d_grad = Eigen::VectorXd::Zero(0); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (2) and " + "Dimension of log std vector (0) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(3); + B_grad = Eigen::VectorXd::Zero(3, rank); + log_d_grad = Eigen::VectorXd::Zero(3); + stan::variational::normal_lowrank elbo_grad + = stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad); + + error + = "stan::variational::normal_lowrank::calc_grad: " + "Dimension of elbo_grad (3) and " + "Dimension of variational q (2) must match in size"; + EXPECT_THROW_MSG(mufaclog_d.calc_grad(elbo_grad, my_model, cont_params, + n_monte_carlo_grad, base_rng, logger), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(2); + B_grad = Eigen::MatrixXd::Zero(2, rank + 1); + log_d_grad = Eigen::VectorXd::Zero(2); + stan::variational::normal_lowrank elbo_grad2 + = stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad); + + error + = "stan::variational::normal_lowrank::calc_grad: " + "Rank of elbo_grad (2) and " + "Rank of variational q (1) must match in size"; + EXPECT_THROW_MSG(mufaclog_d.calc_grad(elbo_grad2, my_model, cont_params, + n_monte_carlo_grad, base_rng, logger), + std::invalid_argument, error); +} diff --git a/src/test/unit/variational/advi_multivar_with_constraint_test.cpp b/src/test/unit/variational/advi_multivar_with_constraint_test.cpp index 8730b3b0672..3ce523a5a05 100644 --- a/src/test/unit/variational/advi_multivar_with_constraint_test.cpp +++ b/src/test/unit/variational/advi_multivar_with_constraint_test.cpp @@ -178,3 +178,138 @@ TEST(advi_test, multivar_with_constraint_meanfield) { n_monte_carlo_grad, base_rng, logger), std::invalid_argument, error); } + +TEST(advi_test, multivar_with_constraint_lowrank) { + // Create mock data_var_context + static const std::string DATA = ""; + std::stringstream data_stream(DATA); + stan::io::dump dummy_context(data_stream); + + // Instantiate model + Model my_model(dummy_context); + + // RNG + rng_t base_rng(0); + + // Other params + int n_monte_carlo_grad = 10; + int n_monte_carlo_elbo = 1e6; + std::stringstream log_stream; + stan::callbacks::stream_logger logger(log_stream, log_stream, log_stream, + log_stream, log_stream); + + // Dummy input + Eigen::VectorXd cont_params = Eigen::VectorXd::Zero(2); + cont_params(0) = 0.75; + cont_params(1) = 0.75; + + // ADVI + size_t rank = 1; + stan::variational::advi_lowrank test_advi( + my_model, cont_params, base_rng, rank, n_monte_carlo_grad, + n_monte_carlo_elbo, 100, 1); + + // Create some arbitrary variational q() family to calculate the ELBO over + Eigen::VectorXd mu + = Eigen::VectorXd::Constant(my_model.num_params_r(), log(2.5)); + Eigen::MatrixXd fac = Eigen::MatrixXd::Constant(my_model.num_params_r(), rank, + 0.0); // no cov + Eigen::VectorXd log_d + = Eigen::VectorXd::Constant(my_model.num_params_r(), + 0.0); // initializing sigma_tilde = 0 + // means sigma = 1 + stan::variational::normal_lowrank mufaclog_d + = stan::variational::normal_lowrank(mu, fac, log_d); + + double elbo = 0.0; + elbo = test_advi.calc_ELBO(mufaclog_d, logger); + + // Can calculate ELBO analytically + double zeta = -0.5 * (3 * 2 * log(2.0 * stan::math::pi()) + 18.5 + 25 + 13); + Eigen::VectorXd mu_J = Eigen::VectorXd::Zero(2); + mu_J(0) = 10.5; + mu_J(1) = 7.5; + + double elbo_true = 0.0; + elbo_true += zeta; + elbo_true += 74.192457181505773; //;mu_J.dot( (mu + 0.5).exp() ); + elbo_true += -0.5 * 3 * (92.363201236633131); + elbo_true += 2 * log(2.5); + elbo_true += 1 + log(2.0 * stan::math::pi()); + + double const EPSILON = 1.0; + EXPECT_NEAR(elbo_true, elbo, EPSILON); + + Eigen::VectorXd mu_grad = Eigen::VectorXd::Zero(3); + Eigen::MatrixXd B_grad = Eigen::MatrixXd::Zero(3, rank); + Eigen::VectorXd log_d_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + + std::string error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (3) and " + "Dimension of log std vector (2) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(0); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (0) and " + "Dimension of low-rank factor (3) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + B_grad = Eigen::MatrixXd::Zero(my_model.num_params_r(), rank); + log_d_grad = Eigen::VectorXd::Zero(3); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (2) and " + "Dimension of log std vector (3) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + log_d_grad = Eigen::VectorXd::Zero(0); + + error + = "stan::variational::normal_lowrank: " + "Dimension of mean vector (2) and " + "Dimension of log std vector (0) must match in size"; + EXPECT_THROW_MSG( + stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(3); + B_grad = Eigen::MatrixXd::Zero(3, rank); + log_d_grad = Eigen::VectorXd::Zero(3); + stan::variational::normal_lowrank elbo_grad + = stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad); + + error + = "stan::variational::normal_lowrank::calc_grad: " + "Dimension of elbo_grad (3) and " + "Dimension of variational q (2) must match in size"; + EXPECT_THROW_MSG(mufaclog_d.calc_grad(elbo_grad, my_model, cont_params, + n_monte_carlo_grad, base_rng, logger), + std::invalid_argument, error); + + mu_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + B_grad = Eigen::MatrixXd::Zero(my_model.num_params_r(), rank + 1); + log_d_grad = Eigen::VectorXd::Zero(my_model.num_params_r()); + stan::variational::normal_lowrank elbo_grad2 + = stan::variational::normal_lowrank(mu_grad, B_grad, log_d_grad); + + error + = "stan::variational::normal_lowrank::calc_grad: " + "Rank of elbo_grad (2) and " + "Rank of variational q (1) must match in size"; + EXPECT_THROW_MSG(mufaclog_d.calc_grad(elbo_grad2, my_model, cont_params, + n_monte_carlo_grad, base_rng, logger), + std::invalid_argument, error); +} diff --git a/src/test/unit/variational/eta_adapt_big_test.cpp b/src/test/unit/variational/eta_adapt_big_test.cpp index 080c666b328..a1dcb51be0b 100644 --- a/src/test/unit/variational/eta_adapt_big_test.cpp +++ b/src/test/unit/variational/eta_adapt_big_test.cpp @@ -34,11 +34,15 @@ class eta_adapt_big_test : public ::testing::Test { advi_fullrank_ = new stan::variational::advi< stan_model, stan::variational::normal_fullrank, rng_t>( *model_, cont_params_, base_rng_, 1, 100, 100, 1); + + advi_lowrank_ = new stan::variational::advi_lowrank( + *model_, cont_params_, base_rng_, 1, 1, 100, 100, 1); } void TearDown() { delete advi_meanfield_; delete advi_fullrank_; + delete advi_lowrank_; delete model_; } @@ -46,6 +50,7 @@ class eta_adapt_big_test : public ::testing::Test { rng_t> *advi_meanfield_; stan::variational::advi *advi_fullrank_; + stan::variational::advi_lowrank *advi_lowrank_; std::stringstream model_stream_; std::stringstream log_stream_; stan::callbacks::stream_logger logger; @@ -60,7 +65,10 @@ TEST_F(eta_adapt_big_test, eta_should_be_big) { = stan::variational::normal_meanfield(cont_params_); stan::variational::normal_fullrank fullrank_init = stan::variational::normal_fullrank(cont_params_); + stan::variational::normal_lowrank lowrank_init + = stan::variational::normal_lowrank(cont_params_, 1); EXPECT_EQ(100.0, advi_meanfield_->adapt_eta(meanfield_init, 50, logger)); EXPECT_EQ(100.0, advi_fullrank_->adapt_eta(fullrank_init, 50, logger)); + EXPECT_EQ(100.0, advi_lowrank_->adapt_eta(lowrank_init, 50, logger)); } diff --git a/src/test/unit/variational/eta_adapt_fail_test.cpp b/src/test/unit/variational/eta_adapt_fail_test.cpp index 1b98f930f6b..c06a49cff83 100644 --- a/src/test/unit/variational/eta_adapt_fail_test.cpp +++ b/src/test/unit/variational/eta_adapt_fail_test.cpp @@ -34,18 +34,27 @@ class eta_should_fail_test : public ::testing::Test { advi_fullrank_ = new stan::variational::advi< stan_model, stan::variational::normal_fullrank, rng_t>( *model_, cont_params_, base_rng_, 1, 1, 100, 1); + + rank_ = 1; + advi_lowrank_ = new stan::variational::advi_lowrank( + *model_, cont_params_, base_rng_, rank_, 1, 1, 100, 1); } void TearDown() { delete advi_meanfield_; delete advi_fullrank_; + delete advi_lowrank_; delete model_; } + // low-rank approx rank + size_t rank_; + stan::variational::advi *advi_meanfield_; stan::variational::advi *advi_fullrank_; + stan::variational::advi_lowrank *advi_lowrank_; std::stringstream model_stream_; std::stringstream log_stream_; stan::callbacks::stream_logger logger; @@ -60,6 +69,8 @@ TEST_F(eta_should_fail_test, eta_adapt_should_fail) { = stan::variational::normal_meanfield(cont_params_); stan::variational::normal_fullrank fullrank_init = stan::variational::normal_fullrank(cont_params_); + stan::variational::normal_lowrank lowrank_init + = stan::variational::normal_lowrank(cont_params_, rank_); std::string error = "stan::variational::advi::adapt_eta: " @@ -71,4 +82,6 @@ TEST_F(eta_should_fail_test, eta_adapt_should_fail) { std::domain_error, error); EXPECT_THROW_MSG(advi_fullrank_->adapt_eta(fullrank_init, 1, logger), std::domain_error, error); + EXPECT_THROW_MSG(advi_lowrank_->adapt_eta(lowrank_init, 1, logger), + std::domain_error, error); } diff --git a/src/test/unit/variational/families/normal_lowrank_test.cpp b/src/test/unit/variational/families/normal_lowrank_test.cpp new file mode 100644 index 00000000000..cc4d12295c3 --- /dev/null +++ b/src/test/unit/variational/families/normal_lowrank_test.cpp @@ -0,0 +1,187 @@ +#include +#include +#include +#include + +TEST(normal_lowrank_test, zero_init) { + int my_dimension = 10; + int my_rank = 3; + + stan::variational::normal_lowrank my_normal_lowrank(my_dimension, my_rank); + EXPECT_FLOAT_EQ(my_dimension, my_normal_lowrank.dimension()); + EXPECT_FLOAT_EQ(my_rank, my_normal_lowrank.rank()); + + const Eigen::VectorXd& mu_out = my_normal_lowrank.mu(); + const Eigen::MatrixXd& B_out = my_normal_lowrank.B(); + const Eigen::MatrixXd& log_d_out = my_normal_lowrank.log_d(); + + for (int i = 0; i < my_dimension; ++i) { + EXPECT_FLOAT_EQ(0.0, mu_out(i)); + EXPECT_FLOAT_EQ(0.0, log_d_out(i)); + for (int j = 0; j < my_rank; ++j) { + EXPECT_FLOAT_EQ(0.0, B_out(i, j)); + } + } +} + +TEST(normal_lowrank_test, dimension_and_rank) { + Eigen::VectorXd mu(4); + mu << 5.7, -3.2, 0.1332, -1.87; + + Eigen::MatrixXd B(4, 3); + B << 1.3, 0, 0, 2.3, 41, 0, 3.3, 42, 92, 4.3, 43, 93; + + Eigen::VectorXd d(4); + d << 0.63, 0.94, 1.32, 1.18; + Eigen::VectorXd log_d = d.array().log().matrix(); + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, d); + + EXPECT_FLOAT_EQ(mu.size(), my_normal_lowrank.dimension()); + EXPECT_FLOAT_EQ(B.cols(), my_normal_lowrank.rank()); +} + +TEST(normal_lowrank_test, mean_vector) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, 2.3, 41, 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + Eigen::VectorXd log_d = d.array().log().matrix(); + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, log_d); + + const Eigen::Vector3d& mu_out = my_normal_lowrank.mu(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) + EXPECT_FLOAT_EQ(mu(i), mu_out(i)); + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::Vector3d mu_nan = Eigen::VectorXd::Constant(3, nan); + + EXPECT_THROW( + stan::variational::normal_lowrank my_normal_lowrank_nan(mu_nan, B, log_d); + , std::domain_error); + EXPECT_THROW(my_normal_lowrank.set_mu(mu_nan);, std::domain_error); + Eigen::MatrixXd B_nan = Eigen::MatrixXd::Constant(3, 3, nan); + EXPECT_THROW( + stan::variational::normal_lowrank my_normal_lowrank_nan(mu, B_nan, log_d); + , std::domain_error); + + my_normal_lowrank.set_to_zero(); + const Eigen::Vector3d& mu_out_zero = my_normal_lowrank.mu(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) { + EXPECT_FLOAT_EQ(0.0, mu_out_zero(i)); + } +} + +TEST(normal_lowrank_test, lowrank_factor) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, 2.3, 41, 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + Eigen::VectorXd log_d = d.array().log().matrix(); + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, log_d); + + const Eigen::MatrixXd& B_out = my_normal_lowrank.B(); + + for (int j = 0, nRows = B.rows(), nCols = B.cols(); j < nCols; ++j) { + for (int i = 0; i < nRows; ++i) { + EXPECT_FLOAT_EQ(B(i, j), B_out(i, j)); + } + } + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::MatrixXd B_nan = Eigen::MatrixXd::Constant(3, 2, nan); + EXPECT_THROW(my_normal_lowrank.set_B(B_nan);, std::domain_error); + my_normal_lowrank.set_to_zero(); + const Eigen::MatrixXd& B_out_zero = my_normal_lowrank.B(); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) { + for (int j = 0; j < my_normal_lowrank.rank(); ++j) { + EXPECT_FLOAT_EQ(0.0, B_out_zero(i, j)); + } + } +} + +TEST(normal_lowrank_test, entropy) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, 2.3, 41, 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 1.32; + Eigen::VectorXd log_d = d.array().log().matrix(); + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, log_d); + + double entropy_true = 8.86050306582748; + + const double entropy_out = my_normal_lowrank.entropy(); + + EXPECT_FLOAT_EQ(entropy_out, entropy_true); +} + +TEST(normal_lowrank_test, transform) { + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, 2.3, 41, 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 0.1332; + Eigen::VectorXd log_d = d.array().log().matrix(); + + Eigen::VectorXd x(5); + x << 7.1, -9.2, 0.59, 0.24, -1.92; + + Eigen::Vector3d x_transformed; + x_transformed << 15.3017, -363.8444, -363.092544; + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, log_d); + + Eigen::Vector3d x_result; + x_result = my_normal_lowrank.transform(x); + + for (int i = 0; i < my_normal_lowrank.dimension(); ++i) + EXPECT_FLOAT_EQ(x_result(i), x_transformed(i)); + + double nan = std::numeric_limits::quiet_NaN(); + Eigen::VectorXd x_nan = Eigen::VectorXd::Constant(5, nan); + EXPECT_THROW(my_normal_lowrank.transform(x_nan);, std::domain_error); +} + +TEST(normal_lowrank_test, calc_log_g) { + Eigen::VectorXd x(5); + x << 7.1, -9.2, 0.59, 0.24, -1.92; + + Eigen::Vector3d mu; + mu << 5.7, -3.2, 0.1332; + + Eigen::MatrixXd B(3, 2); + B << 1.3, 0, 2.3, 41, 3.3, 42; + + Eigen::Vector3d d; + d << 0.63, 0.94, 0.1332; + Eigen::VectorXd log_d = d.array().log().matrix(); + + stan::variational::normal_lowrank my_normal_lowrank(mu, B, log_d); + + double log_g_true = -69.57104999999999; + + const double log_g_out = my_normal_lowrank.calc_log_g(x); + + EXPECT_FLOAT_EQ(log_g_out, log_g_true); +} diff --git a/src/test/unit/variational/stochastic_gradient_ascent_test.cpp b/src/test/unit/variational/stochastic_gradient_ascent_test.cpp index 413cdd29bcf..c1e18f29dba 100644 --- a/src/test/unit/variational/stochastic_gradient_ascent_test.cpp +++ b/src/test/unit/variational/stochastic_gradient_ascent_test.cpp @@ -236,10 +236,17 @@ TEST_F(stochastic_gradient_ascent_test, mock_model, stan::variational::normal_fullrank, mock_rng>( model, cont_params, rng, 1, 100, 100, 1); + size_t rank = 1; + stan::variational::advi_lowrank* advi_lowrank + = new stan::variational::advi_lowrank( + model, cont_params, rng, rank, 1, 100, 100, 1); + stan::variational::normal_meanfield meanfield_init = stan::variational::normal_meanfield(cont_params); stan::variational::normal_fullrank fullrank_init = stan::variational::normal_fullrank(cont_params); + stan::variational::normal_lowrank lowrank_init + = stan::variational::normal_lowrank(cont_params, rank); std::string error = "stan::variational::advi::calc_ELBO: " @@ -254,9 +261,13 @@ TEST_F(stochastic_gradient_ascent_test, EXPECT_THROW_MSG(advi_fullrank->stochastic_gradient_ascent( fullrank_init, 1.0, 0.01, 1000, logger, writer), std::domain_error, error); + EXPECT_THROW_MSG(advi_lowrank->stochastic_gradient_ascent( + lowrank_init, 1.0, 0.01, 1000, logger, writer), + std::domain_error, error); delete advi_meanfield; delete advi_fullrank; + delete advi_lowrank; } TEST_F(stochastic_gradient_ascent_test, initialize_state_zero_grad_error) { @@ -277,10 +288,17 @@ TEST_F(stochastic_gradient_ascent_test, initialize_state_zero_grad_error) { mock_throwing_model, stan::variational::normal_fullrank, mock_rng>( throwing_model, cont_params, rng, 1, 100, 100, 1); + size_t rank = 1; + stan::variational::advi_lowrank* advi_lowrank + = new stan::variational::advi_lowrank( + throwing_model, cont_params, rng, rank, 1, 100, 100, 1); + stan::variational::normal_meanfield meanfield_init = stan::variational::normal_meanfield(cont_params); stan::variational::normal_fullrank fullrank_init = stan::variational::normal_fullrank(cont_params); + stan::variational::normal_lowrank lowrank_init + = stan::variational::normal_lowrank(cont_params, rank); std::string error = "stan::variational::normal_meanfield::calc_grad: " @@ -304,6 +322,18 @@ TEST_F(stochastic_gradient_ascent_test, initialize_state_zero_grad_error) { fullrank_init, 1.0, 0.01, 1000, logger, writer), std::domain_error, error); + error + = "stan::variational::normal_lowrank::calc_grad: " + "The number of dropped evaluations " + "has reached its maximum amount (10). " + "Your model may be either severely " + "ill-conditioned or misspecified."; + + EXPECT_THROW_MSG(advi_lowrank->stochastic_gradient_ascent( + lowrank_init, 1.0, 0.01, 1000, logger, writer), + std::domain_error, error); + delete advi_meanfield; delete advi_fullrank; + delete advi_lowrank; }