diff --git a/stan/math/prim/prob/gamma_lccdf.hpp b/stan/math/prim/prob/gamma_lccdf.hpp index a670cefcecf..aaea6f9d4a4 100644 --- a/stan/math/prim/prob/gamma_lccdf.hpp +++ b/stan/math/prim/prob/gamma_lccdf.hpp @@ -6,9 +6,10 @@ #include #include #include -#include -#include +#include +#include #include +#include #include #include #include @@ -27,22 +28,24 @@ inline return_type_t gamma_lccdf( using T_partials_return = partials_return_t; using std::exp; using std::log; - using std::pow; using T_y_ref = ref_type_t; using T_alpha_ref = ref_type_t; using T_beta_ref = ref_type_t; static constexpr const char* function = "gamma_lccdf"; + check_consistent_sizes(function, "Random variable", y, "Shape parameter", alpha, "Inverse scale parameter", beta); + T_y_ref y_ref = y; T_alpha_ref alpha_ref = alpha; T_beta_ref beta_ref = beta; + check_positive_finite(function, "Shape parameter", alpha_ref); check_positive_finite(function, "Inverse scale parameter", beta_ref); check_nonnegative(function, "Random variable", y_ref); if (size_zero(y, alpha, beta)) { - return 0; + return 0.0; } T_partials_return P(0.0); @@ -51,61 +54,83 @@ inline return_type_t gamma_lccdf( scalar_seq_view y_vec(y_ref); scalar_seq_view alpha_vec(alpha_ref); scalar_seq_view beta_vec(beta_ref); - size_t N = max_size(y, alpha, beta); - - // Explicit return for extreme values - // The gradients are technically ill-defined, but treated as zero - for (size_t i = 0; i < stan::math::size(y); i++) { - if (y_vec.val(i) == 0) { - // LCCDF(0) = log(P(Y > 0)) = log(1) = 0 - return ops_partials.build(0.0); + const size_t N = max_size(y, alpha, beta); + + constexpr bool need_y_beta_deriv = is_any_autodiff_v; + + [[maybe_unused]] T_partials_return lgamma_alpha_const = 0.0; + [[maybe_unused]] bool alpha_is_scalar = false; + if constexpr (need_y_beta_deriv) { + alpha_is_scalar = stan::math::size(alpha) == 1; + if (alpha_is_scalar) { + const T_partials_return alpha0 = value_of(alpha_vec.val(0)); + lgamma_alpha_const = lgamma(alpha0); } } - for (size_t n = 0; n < N; n++) { - // Explicit results for extreme values - // The gradients are technically ill-defined, but treated as zero - if (y_vec.val(n) == INFTY) { - // LCCDF(∞) = log(P(Y > ∞)) = log(0) = -∞ + for (size_t n = 0; n < N; ++n) { + const T_partials_return y_dbl = value_of(y_vec.val(n)); + + if (y_dbl == 0.0) { + continue; + } + if (y_dbl == INFTY) { return ops_partials.build(negative_infinity()); } - const T_partials_return y_dbl = y_vec.val(n); - const T_partials_return alpha_dbl = alpha_vec.val(n); - const T_partials_return beta_dbl = beta_vec.val(n); - const T_partials_return beta_y_dbl = beta_dbl * y_dbl; + const T_partials_return alpha_dbl = value_of(alpha_vec.val(n)); + const T_partials_return beta_dbl = value_of(beta_vec.val(n)); + const T_partials_return beta_y = beta_dbl * y_dbl; + + // ---------- 1. VALUE: log CCDF via lower regularized gamma ---------- + // Pn = P(alpha, beta*y) = CDF + // Qn = 1 - Pn = CCDF + const T_partials_return Pn = gamma_p(alpha_dbl, beta_y); + const T_partials_return log_Qn = log1m(Pn); // = log(1 - Pn) + const T_partials_return Qn = 1.0 - Pn; // needed for gradients - // Qn = 1 - Pn - const T_partials_return Qn = gamma_q(alpha_dbl, beta_y_dbl); - const T_partials_return log_Qn = log(Qn); + // If Qn underflows to 0 numerically, the log-CCDF is -infinity + if (Qn <= 0.0) { + return ops_partials.build(negative_infinity()); + } P += log_Qn; - if constexpr (is_any_autodiff_v) { - const T_partials_return log_y_dbl = log(y_dbl); - const T_partials_return log_beta_dbl = log(beta_dbl); - const T_partials_return log_pdf - = alpha_dbl * log_beta_dbl - lgamma(alpha_dbl) - + (alpha_dbl - 1.0) * log_y_dbl - beta_y_dbl; - const T_partials_return common_term = exp(log_pdf - log_Qn); + if constexpr (need_y_beta_deriv) { + const T_partials_return log_y = log(y_dbl); + const T_partials_return log_beta = log(beta_dbl); + const T_partials_return lgamma_alpha + = (alpha_is_scalar ? lgamma_alpha_const : lgamma(alpha_dbl)); + + const T_partials_return log_pdf = alpha_dbl * log_beta - lgamma_alpha + + (alpha_dbl - 1.0) * log_y - beta_y; + + // hazard = f(y) / Q(y), on the log scale as exp(log_pdf - log_Qn) + const T_partials_return hazard = exp(log_pdf - log_Qn); if constexpr (is_autodiff_v) { - // d/dy log(1-F(y)) = -f(y)/(1-F(y)) - partials<0>(ops_partials)[n] -= common_term; + partials<0>(ops_partials)[n] += -hazard; } if constexpr (is_autodiff_v) { - // d/dbeta log(1-F(y)) = -y*f(y)/(beta*(1-F(y))) - partials<2>(ops_partials)[n] -= y_dbl / beta_dbl * common_term; + partials<2>(ops_partials)[n] += -(y_dbl / beta_dbl) * hazard; } } if constexpr (is_autodiff_v) { - const T_partials_return digamma_val = digamma(alpha_dbl); - const T_partials_return gamma_val = tgamma(alpha_dbl); - // d/dalpha log(1-F(y)) = grad_upper_inc_gamma / (1-F(y)) - partials<1>(ops_partials)[n] - += grad_reg_inc_gamma(alpha_dbl, beta_y_dbl, gamma_val, digamma_val) - / Qn; + // For the shape derivative, we stay entirely on the P side: + // + // Q(alpha, z) = 1 - P(alpha, z) + // d/da Q = - d/da P + // + // so + // + // d/da log Q = (1 / Q) * dQ/da + // = - (1 / Q) * dP/da. + // + // grad_reg_lower_inc_gamma(alpha, z) = d/da P(alpha, z) + const T_partials_return dP_da + = grad_reg_lower_inc_gamma(alpha_dbl, beta_y); + partials<1>(ops_partials)[n] -= dP_da / Qn; } } return ops_partials.build(P); diff --git a/test/unit/math/prim/prob/gamma_lccdf_test.cpp b/test/unit/math/prim/prob/gamma_lccdf_test.cpp index 2893f2f0166..6db4ab2d46f 100644 --- a/test/unit/math/prim/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/prim/prob/gamma_lccdf_test.cpp @@ -66,6 +66,31 @@ TEST(ProbGamma, lccdf_small_alpha_small_y) { EXPECT_LT(result, 0.0); } +TEST(ProbGamma, lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + + // For large alpha and very small y, the CCDF is extremely close to 1. + // The old implementation computed `log(gamma_q(alpha, beta * y))`, which can + // round to `log(1) == 0`. The updated implementation uses `log1m(gamma_p)`, + // which preserves the tiny negative value. + double y = 1e-8; + double alpha = 31.25; + double beta = 1.0; + + double new_val = gamma_lccdf(y, alpha, beta); + double expected = log1m(gamma_p(alpha, beta * y)); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha, beta * y)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(new_val, 0.0); + EXPECT_DOUBLE_EQ(new_val, expected); +} + TEST(ProbGamma, lccdf_large_alpha_large_y) { using stan::math::gamma_lccdf; diff --git a/test/unit/math/rev/prob/gamma_lccdf_test.cpp b/test/unit/math/rev/prob/gamma_lccdf_test.cpp index 1066773e060..ea53cefe465 100644 --- a/test/unit/math/rev/prob/gamma_lccdf_test.cpp +++ b/test/unit/math/rev/prob/gamma_lccdf_test.cpp @@ -230,6 +230,48 @@ TEST(ProbDistributionsGamma, lccdf_extreme_values_small) { } } +TEST(ProbDistributionsGamma, + lccdf_alpha_gt_30_small_y_old_code_rounds_to_zero) { + using stan::math::gamma_lccdf; + using stan::math::gamma_p; + using stan::math::gamma_q; + using stan::math::log1m; + using stan::math::var; + + // Same comparison as the prim test, but also exercises autodiff for + // alpha > 30. + double y_d = 1e-8; + double alpha_d = 31.25; + double beta_d = 1.0; + + var y_v = y_d; + var alpha_v = alpha_d; + var beta_v = beta_d; + + var lccdf_var = gamma_lccdf(y_v, alpha_v, beta_v); + + // Old code: log(gamma_q(alpha, beta * y)) + double old_val = std::log(gamma_q(alpha_d, beta_d * y_d)); + double expected = log1m(gamma_p(alpha_d, beta_d * y_d)); + + EXPECT_EQ(old_val, 0.0); + EXPECT_LT(lccdf_var.val(), 0.0); + EXPECT_DOUBLE_EQ(lccdf_var.val(), expected); + + std::vector vars = {y_v, alpha_v, beta_v}; + std::vector grads; + lccdf_var.grad(vars, grads); + + for (size_t i = 0; i < grads.size(); ++i) { + EXPECT_FALSE(std::isnan(grads[i])) << "Gradient " << i << " is NaN"; + EXPECT_TRUE(std::isfinite(grads[i])) + << "Gradient " << i << " is not finite"; + } + + // d/dy log(CCDF) should be <= 0 (can underflow to -0) + EXPECT_LE(grads[0], 0.0); +} + TEST(ProbDistributionsGamma, lccdf_extreme_values_large) { using stan::math::gamma_lccdf; using stan::math::var;