diff --git a/stan/math/fwd/fun/log_softmax.hpp b/stan/math/fwd/fun/log_softmax.hpp index b59bffa6165..64d578cb4d7 100644 --- a/stan/math/fwd/fun/log_softmax.hpp +++ b/stan/math/fwd/fun/log_softmax.hpp @@ -6,44 +6,48 @@ #include #include #include +#include +#include namespace stan { namespace math { -template -inline Eigen::Matrix, Eigen::Dynamic, 1> log_softmax( - const Eigen::Matrix, Eigen::Dynamic, 1>& alpha) { - using Eigen::Dynamic; - using Eigen::Matrix; - - Matrix alpha_t(alpha.size()); - for (int k = 0; k < alpha.size(); ++k) { - alpha_t(k) = alpha(k).val_; - } - - Matrix softmax_alpha_t = softmax(alpha_t); - Matrix log_softmax_alpha_t = log_softmax(alpha_t); - - Matrix, Dynamic, 1> log_softmax_alpha(alpha.size()); - for (int k = 0; k < alpha.size(); ++k) { - log_softmax_alpha(k).val_ = log_softmax_alpha_t(k); - log_softmax_alpha(k).d_ = 0; - } - - for (int m = 0; m < alpha.size(); ++m) { - T negative_alpha_m_d_times_softmax_alpha_t_m - = -alpha(m).d_ * softmax_alpha_t(m); - for (int k = 0; k < alpha.size(); ++k) { - if (m == k) { - log_softmax_alpha(k).d_ - += alpha(m).d_ + negative_alpha_m_d_times_softmax_alpha_t_m; - } else { - log_softmax_alpha(k).d_ += negative_alpha_m_d_times_softmax_alpha_t_m; +/** + * Return the log softmax of the specified vector or container of vectors. + * + * @tparam T Type of input vector or matrix. + * @param[in] x Unconstrained input vector. + * @return Softmax of the input. + * @throw std::domain_error If the input vector is size 0. + */ +template >>...> +inline auto log_softmax(const T& x) { + return apply_vector_unary::apply(x, [&](const auto& alpha) { + using T_fvar = value_type_t; + using T_fvar_inner = typename T_fvar::Scalar; + + Eigen::Matrix alpha_t = alpha.val(); + Eigen::Matrix softmax_alpha_t = softmax(alpha_t); + + Eigen::Matrix log_softmax_alpha(alpha.size()); + log_softmax_alpha.val() = log_softmax(alpha_t); + log_softmax_alpha.d().setZero(); + + for (int m = 0; m < alpha.size(); ++m) { + T_fvar_inner negative_alpha_m_d_times_softmax_alpha_t_m + = -alpha(m).d_ * softmax_alpha_t(m); + for (int k = 0; k < alpha.size(); ++k) { + if (m == k) { + log_softmax_alpha(k).d_ + += alpha(m).d_ + negative_alpha_m_d_times_softmax_alpha_t_m; + } else { + log_softmax_alpha(k).d_ += negative_alpha_m_d_times_softmax_alpha_t_m; + } } } - } - return log_softmax_alpha; + return log_softmax_alpha; + }); } } // namespace math diff --git a/stan/math/fwd/fun/log_sum_exp.hpp b/stan/math/fwd/fun/log_sum_exp.hpp index 87d8534fe1b..1e8afb6f6e4 100644 --- a/stan/math/fwd/fun/log_sum_exp.hpp +++ b/stan/math/fwd/fun/log_sum_exp.hpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -31,37 +32,35 @@ inline fvar log_sum_exp(double x1, const fvar& x2) { template inline fvar log_sum_exp(const fvar& x1, double x2) { - using std::exp; - if (x2 == NEGATIVE_INFTY) { - return fvar(x1.val_, x1.d_); - } - return fvar(log_sum_exp(x1.val_, x2), x1.d_ / (1 + exp(x2 - x1.val_))); -} - -template -fvar log_sum_exp(const std::vector >& v) { - using std::exp; - std::vector vals(v.size()); - for (size_t i = 0; i < v.size(); ++i) { - vals[i] = v[i].val_; - } - T deriv(0.0); - T denominator(0.0); - for (size_t i = 0; i < v.size(); ++i) { - T exp_vi = exp(vals[i]); - denominator += exp_vi; - deriv += v[i].d_ * exp_vi; - } - return fvar(log_sum_exp(vals), deriv / denominator); + return log_sum_exp(x2, x1); } -template -fvar log_sum_exp(const Eigen::Matrix, R, C>& v) { - Eigen::Matrix vals = v.val(); - Eigen::Matrix exp_vals = vals.array().exp(); +/** + * Return the log of the sum of the exponentiated values of the specified + * matrix of values. The matrix may be a full matrix, a vector, + * a row vector, or a container of these. + * + * The function is defined as follows to prevent overflow in exponential + * calculations. + * + * \f$\log \sum_{n=1}^N \exp(x_n) = \max(x) + \log \sum_{n=1}^N \exp(x_n - + * \max(x))\f$. + * + * @tparam T Type of input vector or matrix. + * @param[in] x Matrix of specified values. + * @return The log of the sum of the exponentiated vector values. + */ +template >>...> +inline auto log_sum_exp(const T& x) { + return apply_vector_unary::reduce(x, [&](const auto& v) { + using T_fvar_inner = typename value_type_t::Scalar; + using mat_type = Eigen::Matrix; + mat_type vals = v.val(); + mat_type exp_vals = vals.array().exp(); - return fvar(log_sum_exp(vals), - v.d().cwiseProduct(exp_vals).sum() / exp_vals.sum()); + return fvar( + log_sum_exp(vals), v.d().cwiseProduct(exp_vals).sum() / exp_vals.sum()); + }); } } // namespace math diff --git a/stan/math/prim/fun/log_softmax.hpp b/stan/math/prim/fun/log_softmax.hpp index a51370d0b1a..9dde2af1f6d 100644 --- a/stan/math/prim/fun/log_softmax.hpp +++ b/stan/math/prim/fun/log_softmax.hpp @@ -4,6 +4,7 @@ #include #include #include +#include namespace stan { namespace math { @@ -32,18 +33,17 @@ namespace math { * \right. * \f$ * - * @tparam T type of elements in the vector - * @param[in] v Vector to transform. - * @return Unit simplex result of the softmax transform of the vector. + * @tparam T Type of input vector to transform. + * @param[in] x Vector to transform. + * @return log unit simplex result of the softmax transform of the vector. */ -template -inline Eigen::Matrix log_softmax( - const Eigen::Matrix& v) { - check_nonzero_size("log_softmax", "v", v); - return v.array() - log_sum_exp(v); +template >>...> +inline auto log_softmax(const T& x) { + return apply_vector_unary::apply(x, [&](const auto& v) { + check_nonzero_size("log_softmax", "v", v); + return (v.array() - log_sum_exp(v)).matrix(); + }); } - } // namespace math } // namespace stan - #endif diff --git a/stan/math/prim/fun/log_sum_exp.hpp b/stan/math/prim/fun/log_sum_exp.hpp index 2193cb0de7e..ded15b564cd 100644 --- a/stan/math/prim/fun/log_sum_exp.hpp +++ b/stan/math/prim/fun/log_sum_exp.hpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,8 @@ inline return_type_t log_sum_exp(const T2& a, const T1& b) { /** * Return the log of the sum of the exponentiated values of the specified - * sequence of values. + * matrix of values. The matrix may be a full matrix, a vector, + * a row vector, or a container of these. * * The function is defined as follows to prevent overflow in exponential * calculations. @@ -70,57 +72,22 @@ inline return_type_t log_sum_exp(const T2& a, const T1& b) { * \f$\log \sum_{n=1}^N \exp(x_n) = \max(x) + \log \sum_{n=1}^N \exp(x_n - * \max(x))\f$. * - * @param[in] x array of specified values + * @tparam T Type of input vector or matrix. + * @param[in] x Matrix of specified values. * @return The log of the sum of the exponentiated vector values. */ -inline double log_sum_exp(const std::vector& x) { - using std::exp; - using std::log; - double max = NEGATIVE_INFTY; - for (double xx : x) { - if (xx > max) { - max = xx; +template >>...> +inline auto log_sum_exp(const T& x) { + return apply_vector_unary::reduce(x, [&](const auto& v) { + if (v.size() == 0) { + return NEGATIVE_INFTY; } - } - - double sum = 0.0; - for (size_t ii = 0; ii < x.size(); ii++) { - if (x[ii] != NEGATIVE_INFTY) { - sum += exp(x[ii] - max); + const double max = v.maxCoeff(); + if (!std::isfinite(max)) { + return max; } - } - - return max + log(sum); -} - -/** - * Return the log of the sum of the exponentiated values of the specified - * matrix of values. The matrix may be a full matrix, a vector, - * or a row vector. - * - * The function is defined as follows to prevent overflow in exponential - * calculations. - * - * \f$\log \sum_{n=1}^N \exp(x_n) = \max(x) + \log \sum_{n=1}^N \exp(x_n - - * \max(x))\f$. - * - * @tparam R number of rows, can be Eigen::Dynamic - * @tparam C number of columns, can be Eigen::Dynamic - * - * @param[in] x Matrix of specified values - * @return The log of the sum of the exponentiated vector values. - */ -template -double log_sum_exp(const Eigen::Matrix& x) { - if (x.size() == 0) { - return NEGATIVE_INFTY; - } - - const double max = x.maxCoeff(); - if (!std::isfinite(max)) { - return max; - } - return max + std::log((x.array() - max).exp().sum()); + return max + std::log((v.array() - max).exp().sum()); + }); } } // namespace math diff --git a/stan/math/prim/vectorize/apply_vector_unary.hpp b/stan/math/prim/vectorize/apply_vector_unary.hpp new file mode 100644 index 00000000000..d485d5201c8 --- /dev/null +++ b/stan/math/prim/vectorize/apply_vector_unary.hpp @@ -0,0 +1,160 @@ +#ifndef STAN_MATH_PRIM_VECTORIZE_APPLY_VECTOR_UNARY_HPP +#define STAN_MATH_PRIM_VECTORIZE_APPLY_VECTOR_UNARY_HPP + +#include +#include +#include + +namespace stan { +namespace math { + +// Forward declaration to allow specialisations +template +struct apply_vector_unary {}; + +/** + * Base template class for vectorization of unary vector functions + * defined by applying a functor to a standard library vector, Eigen dense + * matrix expression template, or container of these. For each specialisation, + * the same vector type as the input is returned. + * + * Two taxonomies of unary vector functions are implemented: + * - f(vector) -> vector + * - f(vector) -> scalar + * + * This base template class takes (and returns) Eigen expression templates. + */ +template +struct apply_vector_unary> { + /** + * Member function for applying a functor to a vector and subsequently + * returning a vector. The 'auto' return type is used here so that an + * expression template is returned. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x Eigen input to which operation is applied. + * @param f functor to apply to Eigen input. + * @return Eigen expression template with result of applying functor + * to input + */ + template + static inline auto apply(const T& x, const F& f) { + return f(x); + } + + /** + * Member function for applying a functor to a vector and subsequently + * returning a scalar. The reduction to a scalar needs to be implemented + * in the definition of the functor. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x Eigen input to which operation is applied. + * @param f functor to apply to Eigen input. + * @return scalar result of applying functor to input. + */ + template + static inline auto reduce(const T& x, const F& f) { + return f(x); + } +}; + +/** + * Specialisation for use with (non-nested) std::vectors. Inputs are mapped + * to Eigen column vectors and then passed to the base (Eigen) template. + * An std::vector (or scalar) is then returned as the result. + */ +template +struct apply_vector_unary> { + using T_vt = value_type_t; + using T_map = typename Eigen::Map>; + + /** + * Member function for applying a functor to a vector and subsequently + * returning a vector. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x std::vector input to which operation is applied. + * @param f functor to apply to vector input. + * @return std::vector with result of applying functor to input. + */ + template + static inline std::vector apply(const T& x, const F& f) { + std::vector result(x.size()); + Eigen::Map>(result.data(), result.size()) + = apply_vector_unary::apply(as_column_vector_or_scalar(x), f); + return result; + } + + /** + * Member function for applying a functor to a vector and subsequently + * returning a scalar. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x Eigen input to which operation is applied. + * @param f functor to apply to std::vector input. + * @return scalar result of applying functor to input vector. + */ + template + static inline T_vt reduce(const T& x, const F& f) { + return apply_vector_unary::reduce(as_column_vector_or_scalar(x), f); + } +}; + +/** + * Specialisation for use with nested containers (std::vectors). + * For each of the member functions, an std::vector with the appropriate + * type (vector or scalar) is returned. + * + */ +template +struct apply_vector_unary> { + using T_vt = value_type_t; + using T_st = value_type_t; + + /** + * Member function for applying a functor to each container in an std::vector + * and subsequently returning an std::vector of containers. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x std::vector of containers to which operation is applied. + * @param f functor to apply to vector input. + * @return std::vector of containers with result of applying functor to + * input. + */ + template + static inline std::vector apply(const T& x, const F& f) { + size_t x_size = x.size(); + std::vector result(x_size); + for (size_t i = 0; i < x_size; ++i) + result[i] = apply_vector_unary::apply(x[i], f); + return result; + } + + /** + * Member function for applying a functor to each container in an + * std::vector and subsequently returning an std::vector of scalars. + * + * @tparam T Type of argument to which functor is applied. + * @tparam F Type of functor to apply. + * @param x std::vector of containers to which operation is applied. + * @param f functor to apply to vector input. + * @return std::vector of scalars with result of applying functor to input. + */ + template + static inline std::vector reduce(const T& x, const F& f) { + size_t x_size = x.size(); + std::vector result(x_size); + for (size_t i = 0; i < x_size; ++i) + result[i] = apply_vector_unary::reduce(x[i], f); + return result; + } +}; + +} // namespace math +} // namespace stan +#endif diff --git a/stan/math/rev/core/matrix_vari.hpp b/stan/math/rev/core/matrix_vari.hpp index 26bf74e1ed3..4477d565bee 100644 --- a/stan/math/rev/core/matrix_vari.hpp +++ b/stan/math/rev/core/matrix_vari.hpp @@ -2,6 +2,7 @@ #define STAN_MATH_REV_CORE_MATRIX_VARI_HPP #include +#include #include #include #include @@ -15,13 +16,11 @@ class op_matrix_vari : public vari { vari** vis_; public: - template - op_matrix_vari(double f, const Eigen::Matrix& vs) - : vari(f), size_(vs.size()) { - vis_ = reinterpret_cast(operator new(sizeof(vari*) * vs.size())); - for (int i = 0; i < vs.size(); ++i) { - vis_[i] = vs(i).vi_; - } + template ...> + op_matrix_vari(double f, const T& vs) : vari(f), size_(vs.size()) { + vis_ = ChainableStack::instance_->memalloc_.alloc_array(size_); + Eigen::Map>(vis_, vs.rows(), vs.cols()) + = vs.vi(); } vari* operator[](size_t n) const { return vis_[n]; } size_t size() { return size_; } diff --git a/stan/math/rev/fun/log_softmax.hpp b/stan/math/rev/fun/log_softmax.hpp index 58cf4c54273..a903419f738 100644 --- a/stan/math/rev/fun/log_softmax.hpp +++ b/stan/math/rev/fun/log_softmax.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -45,50 +46,50 @@ class log_softmax_elt_vari : public vari { } // namespace internal /** - * Return the softmax of the specified Eigen vector. Softmax is - * guaranteed to return a simplex. + * Return the log softmax of the specified vector or container of vectors. * * The gradient calculations are unfolded. * - * @param alpha Unconstrained input vector. + * @tparam T Type of input vector or matrix. + * @param[in] x Unconstrained input vector. * @return Softmax of the input. * @throw std::domain_error If the input vector is size 0. */ -inline Eigen::Matrix log_softmax( - const Eigen::Matrix& alpha) { - const int a_size = alpha.size(); +template >>...> +inline auto log_softmax(const T& x) { + return apply_vector_unary::apply(x, [&](const auto& alpha) { + const int a_size = alpha.size(); - check_nonzero_size("log_softmax", "alpha", alpha); + check_nonzero_size("log_softmax", "alpha", alpha); - // TODO(carpenter): replace with array alloc - vari** alpha_vi_array - = reinterpret_cast(vari::operator new(sizeof(vari*) * a_size)); - Eigen::Map(alpha_vi_array, a_size) = alpha.vi(); + vari** alpha_vi_array + = ChainableStack::instance_->memalloc_.alloc_array(a_size); + Eigen::Map(alpha_vi_array, a_size) = alpha.vi(); - vector_d alpha_d = alpha.val(); + vector_d alpha_d = alpha.val(); - // fold logic of math::softmax() and math::log_softmax() - // to save computations + // fold logic of math::softmax() and math::log_softmax() + // to save computations - vector_d diff = (alpha_d.array() - alpha_d.maxCoeff()); - vector_d softmax_alpha_d = diff.array().exp(); - double sum = softmax_alpha_d.sum(); - softmax_alpha_d.array() /= sum; - vector_d log_softmax_alpha_d = diff.array() - std::log(sum); + vector_d diff = (alpha_d.array() - alpha_d.maxCoeff()); + vector_d softmax_alpha_d = diff.array().exp(); + double sum = softmax_alpha_d.sum(); + vector_d log_softmax_alpha_d = diff.array() - std::log(sum); - // end fold - // TODO(carpenter): replace with array alloc - double* softmax_alpha_d_array - = reinterpret_cast(vari::operator new(sizeof(double) * a_size)); - Eigen::Map(softmax_alpha_d_array, a_size) = softmax_alpha_d; + // end fold + double* softmax_alpha_d_array + = ChainableStack::instance_->memalloc_.alloc_array(a_size); + Eigen::Map(softmax_alpha_d_array, a_size) + = softmax_alpha_d.array() / sum; - vector_v log_softmax_alpha(a_size); - for (int k = 0; k < a_size; ++k) { - log_softmax_alpha(k) = var(new internal::log_softmax_elt_vari( - log_softmax_alpha_d[k], alpha_vi_array, softmax_alpha_d_array, a_size, - k)); - } - return log_softmax_alpha; + vector_v log_softmax_alpha(a_size); + for (int k = 0; k < a_size; ++k) { + log_softmax_alpha(k) = var(new internal::log_softmax_elt_vari( + log_softmax_alpha_d[k], alpha_vi_array, softmax_alpha_d_array, a_size, + k)); + } + return log_softmax_alpha; + }); } } // namespace math diff --git a/stan/math/rev/fun/log_sum_exp.hpp b/stan/math/rev/fun/log_sum_exp.hpp index 0315ff31a68..471462ff149 100644 --- a/stan/math/rev/fun/log_sum_exp.hpp +++ b/stan/math/rev/fun/log_sum_exp.hpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -37,18 +38,6 @@ class log_sum_exp_vd_vari : public op_vd_vari { } } }; -class log_sum_exp_dv_vari : public op_dv_vari { - public: - log_sum_exp_dv_vari(double a, vari* bvi) - : op_dv_vari(log_sum_exp(a, bvi->val_), a, bvi) {} - void chain() { - if (val_ == NEGATIVE_INFTY) { - bvi_->adj_ += adj_; - } else { - bvi_->adj_ += adj_ * calculate_chain(bvi_->val_, val_); - } - } -}; } // namespace internal @@ -68,70 +57,16 @@ inline var log_sum_exp(const var& a, double b) { * Returns the log sum of exponentials. */ inline var log_sum_exp(double a, const var& b) { - return var(new internal::log_sum_exp_dv_vari(a, b.vi_)); -} - -namespace internal { -inline double log_sum_exp_as_double(const std::vector& x) { - using std::exp; - using std::log; - double max = NEGATIVE_INFTY; - for (size_t i = 0; i < x.size(); ++i) { - if (x[i] > max) { - max = x[i].val(); - } - } - double sum = 0.0; - for (size_t i = 0; i < x.size(); ++i) { - if (x[i] != NEGATIVE_INFTY) { - sum += exp(x[i].val() - max); - } - } - return max + log(sum); -} - -class log_sum_exp_vector_vari : public op_vector_vari { - public: - explicit log_sum_exp_vector_vari(const std::vector& x) - : op_vector_vari(log_sum_exp_as_double(x), x) {} - void chain() { - for (size_t i = 0; i < size_; ++i) { - vis_[i]->adj_ += adj_ * calculate_chain(vis_[i]->val_, val_); - } - } -}; -} // namespace internal - -/** - * Returns the log sum of exponentials. - */ -inline var log_sum_exp(const std::vector& x) { - return var(new internal::log_sum_exp_vector_vari(x)); + return var(new internal::log_sum_exp_vd_vari(b.vi_, a)); } namespace internal { -// these function and the following class just translate -// log_sum_exp for std::vector for Eigen::Matrix - -template -inline double log_sum_exp_as_double(const Eigen::Matrix& x) { - if (x.size() == 0) { - return NEGATIVE_INFTY; - } - - const double max = x.val().maxCoeff(); - if (!std::isfinite(max)) { - return max; - } - return max + std::log((x.val().array() - max).exp().sum()); -} - class log_sum_exp_matrix_vari : public op_matrix_vari { public: - template - explicit log_sum_exp_matrix_vari(const Eigen::Matrix& x) - : op_matrix_vari(log_sum_exp_as_double(x), x) {} + template + explicit log_sum_exp_matrix_vari(const T& x) + : op_matrix_vari(log_sum_exp(x.val()), x) {} void chain() { Eigen::Map vis_map(vis_, size_); vis_map.adj().array() += adj_ * (vis_map.val().array() - val_).exp(); @@ -142,13 +77,14 @@ class log_sum_exp_matrix_vari : public op_matrix_vari { /** * Returns the log sum of exponentials. * - * @tparam R number of rows, can be Eigen::Dynamic - * @tparam C number of columns, can be Eigen::Dynamic + * @tparam T Type of input vector or matrix. * @param x matrix */ -template -inline var log_sum_exp(const Eigen::Matrix& x) { - return var(new internal::log_sum_exp_matrix_vari(x)); +template >>...> +inline auto log_sum_exp(const T& x) { + return apply_vector_unary::reduce(x, [&](const auto& v) { + return var(new internal::log_sum_exp_matrix_vari(v)); + }); } } // namespace math diff --git a/test/unit/math/mix/fun/log_softmax_test.cpp b/test/unit/math/mix/fun/log_softmax_test.cpp index bdd4cdfd085..8155f9677fb 100644 --- a/test/unit/math/mix/fun/log_softmax_test.cpp +++ b/test/unit/math/mix/fun/log_softmax_test.cpp @@ -1,7 +1,9 @@ #include +#include TEST(MathMixMatFun, logSoftmax) { auto f = [](const auto& x) { return stan::math::log_softmax(x); }; + // Column Vectors Eigen::VectorXd x0(0); // error case stan::test::expect_ad(f, x0); @@ -24,4 +26,66 @@ TEST(MathMixMatFun, logSoftmax) { Eigen::VectorXd x3c(3); x3c << 2, 1, 1; stan::test::expect_ad(f, x3c); + + // Row Vectors + Eigen::RowVectorXd rx0(0); // error case + stan::test::expect_ad(f, rx0); + + Eigen::RowVectorXd rx1(1); + rx1 << 0; + stan::test::expect_ad(f, rx1); + + Eigen::RowVectorXd rx2(2); + rx2 << -1, 1; + stan::test::expect_ad(f, rx2); + + Eigen::RowVectorXd rx3(3); + rx3 << -1, 1, 10; + stan::test::expect_ad(f, rx3); + + Eigen::RowVectorXd rx3b(3); + rx3b << 0, 1, 2; + stan::test::expect_ad(f, rx3b); + + Eigen::RowVectorXd rx3c(3); + rx3c << 2, 1, 1; + stan::test::expect_ad(f, rx3c); + + // std vectors + std::vector stx0(0); // error case + stan::test::expect_ad(f, stx0); + + std::vector stx1{0}; + stan::test::expect_ad(f, stx1); + + std::vector stx2{-1, 1}; + stan::test::expect_ad(f, stx2); + + std::vector stx3{-1, 1, 10}; + stan::test::expect_ad(f, stx3); + + std::vector stx3b{0, 1, 2}; + stan::test::expect_ad(f, stx3b); + + std::vector stx3c{2, 1, 1}; + stan::test::expect_ad(f, stx3c); + + // Nested containers + std::vector stvx0{x0, x0}; // error case + stan::test::expect_ad(f, stvx0); + + std::vector stvx1{x1, x1}; + stan::test::expect_ad(f, stvx1); + + std::vector strx0{rx0, rx0}; // error case + stan::test::expect_ad(f, strx0); + + std::vector strx1{rx1, rx1}; + stan::test::expect_ad(f, strx1); + + std::vector> ststx0{stx0, stx0}; // error case + stan::test::expect_ad(f, ststx0); + + std::vector> ststx1{stx1, stx1}; + stan::test::expect_ad(f, ststx1); } diff --git a/test/unit/math/mix/fun/log_sum_exp_test.cpp b/test/unit/math/mix/fun/log_sum_exp_test.cpp index aa3d9eaa46d..7244b70895e 100644 --- a/test/unit/math/mix/fun/log_sum_exp_test.cpp +++ b/test/unit/math/mix/fun/log_sum_exp_test.cpp @@ -70,18 +70,23 @@ TEST(MathMixMatFun, logSumExp) { stan::test::expect_ad(tols, f, x); Eigen::RowVectorXd rx = x; stan::test::expect_ad(tols, f, rx); + std::vector stx + = std::vector(x.data(), x.data() + x.size()); + stan::test::expect_ad(tols, f, stx); } Eigen::MatrixXd x23(2, 2); x23 << 1, 2, 3, 4; stan::test::expect_ad(f, x23); - std::vector a1{0}; - stan::test::expect_ad(f, a1); - - std::vector a2{5, 2}; - stan::test::expect_ad(f, a2); - - std::vector a4{1, 2, 3, 4}; - stan::test::expect_ad(f, a4); + std::vector stvx{x2, x2b, x2c}; + stan::test::expect_ad(tols, f, stvx); + std::vector strx{x2.transpose(), x2b.transpose(), + x2c.transpose()}; + stan::test::expect_ad(tols, f, strx); + std::vector> ststx{ + std::vector(x2.data(), x2.data() + x2.size()), + std::vector(x2b.data(), x2b.data() + x2b.size()), + std::vector(x2c.data(), x2c.data() + x2c.size())}; + stan::test::expect_ad(tols, f, ststx); } diff --git a/test/unit/math/prim/fun/log_softmax_test.cpp b/test/unit/math/prim/fun/log_softmax_test.cpp index ba26ba1ce20..a75a50e281c 100644 --- a/test/unit/math/prim/fun/log_softmax_test.cpp +++ b/test/unit/math/prim/fun/log_softmax_test.cpp @@ -1,5 +1,6 @@ #include #include +#include void test_log_softmax(const Eigen::Matrix& theta) { using Eigen::Dynamic; @@ -33,9 +34,32 @@ TEST(MathMatrixPrimMat, log_softmax) { // x << 0.0; // test_log_softmax(x); + std::vector in{-1, 1}; + std::vector out = log_softmax(in); + stan::math::vector_d x2(2); x2 << -1.0, 1.0; - test_log_softmax(x2); + stan::math::matrix_d m2(2, 2); + m2 << -1.0, 1.0, -1.0, 1.0; + stan::math::vector_d x2_out = log_softmax(x2); + + EXPECT_FLOAT_EQ(out[0], x2_out[0]); + EXPECT_FLOAT_EQ(out[1], x2_out[1]); + + x2_out = log_softmax(m2.diagonal()); + + EXPECT_FLOAT_EQ(out[0], x2_out[0]); + EXPECT_FLOAT_EQ(out[1], x2_out[1]); + + std::vector invec{x2, x2}; + std::vector outvec = log_softmax(invec); + std::vector> instvec{in, in}; + std::vector> outstvec = log_softmax(instvec); + + EXPECT_FLOAT_EQ(outvec[0][0], outstvec[0][0]); + EXPECT_FLOAT_EQ(outvec[0][1], outstvec[0][1]); + EXPECT_FLOAT_EQ(outvec[1][0], outstvec[1][0]); + EXPECT_FLOAT_EQ(outvec[1][1], outstvec[1][1]); // stan::math::vector_d x3(3); // x3 << -1.0, 1.0, 10.0; diff --git a/test/unit/math/prim/fun/log_sum_exp_test.cpp b/test/unit/math/prim/fun/log_sum_exp_test.cpp index eced4e4edb4..5876c8756a5 100644 --- a/test/unit/math/prim/fun/log_sum_exp_test.cpp +++ b/test/unit/math/prim/fun/log_sum_exp_test.cpp @@ -87,9 +87,27 @@ TEST(MathFunctions, log_sum_exp_mat) { using Eigen::Matrix; using stan::math::log_sum_exp; - Matrix m(3, 2); - m << 1, 2, 3, 4, 5, 6; - test_log_sum_exp(m); + Matrix m1(3, 2); + m1 << 1, 2, 3, 4, 5, 6; + std::vector st1{1, 2, 3, 4, 5, 6}; + Matrix m2(3, 2); + m2 << -1, -2, -3, -4, -5, -6; + std::vector st2{-1, -2, -3, -4, -5, -6}; + double m1_out = log_sum_exp(m1); + double m2_out = log_sum_exp(m2); + double st1_out = log_sum_exp(st1); + double st2_out = log_sum_exp(st2); + EXPECT_FLOAT_EQ(m1_out, st1_out); + EXPECT_FLOAT_EQ(m2_out, st2_out); + + std::vector st_m{m1, m2}; + std::vector> st_st{st1, st2}; + std::vector m_out = log_sum_exp(st_m); + std::vector st_out = log_sum_exp(st_st); + EXPECT_FLOAT_EQ(m1_out, m_out[0]); + EXPECT_FLOAT_EQ(m2_out, m_out[1]); + EXPECT_FLOAT_EQ(m1_out, st_out[0]); + EXPECT_FLOAT_EQ(m2_out, st_out[1]); Matrix v(3); v << 1, 2, 3;