// (C) Copyright Nick Thompson 2018. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP #define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP #include #include #include #include #include BOOST_MATH_HEADER_DEPRECATED(""); namespace boost{ namespace math{ namespace tools { template auto means_and_covariance(Container const & u, Container const & v) { using Real = typename Container::value_type; using std::size; BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample."); // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al. Real cov = 0; Real mu_u = u[0]; Real mu_v = v[0]; for(size_t i = 1; i < size(u); ++i) { Real u_tmp = (u[i] - mu_u)/(i+1); Real v_tmp = v[i] - mu_v; cov += i*u_tmp*v_tmp; mu_u = mu_u + u_tmp; mu_v = mu_v + v_tmp/(i+1); } return std::make_tuple(mu_u, mu_v, cov/size(u)); } template auto covariance(Container const & u, Container const & v) { auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v); return cov; } template auto correlation_coefficient(Container const & u, Container const & v) { using Real = typename Container::value_type; using std::size; BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance."); BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples."); Real cov = 0; Real mu_u = u[0]; Real mu_v = v[0]; Real Qu = 0; Real Qv = 0; for(size_t i = 1; i < size(u); ++i) { Real u_tmp = u[i] - mu_u; Real v_tmp = v[i] - mu_v; Qu = Qu + (i*u_tmp*u_tmp)/(i+1); Qv = Qv + (i*v_tmp*v_tmp)/(i+1); cov += i*u_tmp*v_tmp/(i+1); mu_u = mu_u + u_tmp/(i+1); mu_v = mu_v + v_tmp/(i+1); } // If one dataset is constant, then they have no correlation: // See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector // Thanks to zbjornson for pointing this out. if (Qu == 0 || Qv == 0) { return std::numeric_limits::quiet_NaN(); } // Make sure rho in [-1, 1], even in the presence of numerical noise. Real rho = cov/sqrt(Qu*Qv); if (rho > 1) { rho = 1; } if (rho < -1) { rho = -1; } return rho; } }}} #endif