diff --git a/.gitattributes b/.gitattributes index b6ca34f5bc2b2ca64ee6392a0ec8795df8408b14..8f91bfdff1c4d63635803301f5973e4391898a28 100644 --- a/.gitattributes +++ b/.gitattributes @@ -232,3 +232,4 @@ Data/Tsukuyomi/wavs/VOICEACTRESS100_100.wav filter=lfs diff=lfs merge=lfs -text .venv/Lib/site-packages/torch/lib/cudnn_engines_precompiled64_9.dll filter=lfs diff=lfs merge=lfs -text .venv/Lib/site-packages/torch/lib/dnnl.lib filter=lfs diff=lfs merge=lfs -text .venv/Lib/site-packages/torch/lib/torch_cuda.dll filter=lfs diff=lfs merge=lfs -text +.venv/Lib/site-packages/scipy/signal/_spectral.cp39-win_amd64.pyd filter=lfs diff=lfs merge=lfs -text diff --git a/.venv/Lib/site-packages/scipy/signal/_spectral.cp39-win_amd64.pyd b/.venv/Lib/site-packages/scipy/signal/_spectral.cp39-win_amd64.pyd new file mode 100644 index 0000000000000000000000000000000000000000..e87c2889d55f577e9a8b004c420a9822790e3cc1 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/signal/_spectral.cp39-win_amd64.pyd @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34c12e9119432a42d10bb2fde95094848e3810fc5b47c12c09bbdaa7b821ee32 +size 1009152 diff --git a/.venv/Lib/site-packages/scipy/special/__init__.py b/.venv/Lib/site-packages/scipy/special/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1c845e3af28e87d9959ca4e41c92898bbf1bcf04 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/__init__.py @@ -0,0 +1,863 @@ +""" +======================================== +Special functions (:mod:`scipy.special`) +======================================== + +.. currentmodule:: scipy.special + +Almost all of the functions below accept NumPy arrays as input +arguments as well as single numbers. This means they follow +broadcasting and automatic array-looping rules. Technically, +they are `NumPy universal functions +`_. +Functions which do not accept NumPy arrays are marked by a warning +in the section description. + +.. seealso:: + + `scipy.special.cython_special` -- Typed Cython versions of special functions + + +Error handling +============== + +Errors are handled by returning NaNs or other appropriate values. +Some of the special function routines can emit warnings or raise +exceptions when an error occurs. By default this is disabled; to +query and control the current error handling state the following +functions are provided. + +.. autosummary:: + :toctree: generated/ + + geterr -- Get the current way of handling special-function errors. + seterr -- Set how special-function errors are handled. + errstate -- Context manager for special-function error handling. + SpecialFunctionWarning -- Warning that can be emitted by special functions. + SpecialFunctionError -- Exception that can be raised by special functions. + +Available functions +=================== + +Airy functions +-------------- + +.. autosummary:: + :toctree: generated/ + + airy -- Airy functions and their derivatives. + airye -- Exponentially scaled Airy functions and their derivatives. + ai_zeros -- Compute `nt` zeros and values of the Airy function Ai and its derivative. + bi_zeros -- Compute `nt` zeros and values of the Airy function Bi and its derivative. + itairy -- Integrals of Airy functions + + +Elliptic functions and integrals +-------------------------------- + +.. autosummary:: + :toctree: generated/ + + ellipj -- Jacobian elliptic functions. + ellipk -- Complete elliptic integral of the first kind. + ellipkm1 -- Complete elliptic integral of the first kind around `m` = 1. + ellipkinc -- Incomplete elliptic integral of the first kind. + ellipe -- Complete elliptic integral of the second kind. + ellipeinc -- Incomplete elliptic integral of the second kind. + elliprc -- Degenerate symmetric integral RC. + elliprd -- Symmetric elliptic integral of the second kind. + elliprf -- Completely-symmetric elliptic integral of the first kind. + elliprg -- Completely-symmetric elliptic integral of the second kind. + elliprj -- Symmetric elliptic integral of the third kind. + +Bessel functions +---------------- + +.. autosummary:: + :toctree: generated/ + + jv -- Bessel function of the first kind of real order and \ + complex argument. + jve -- Exponentially scaled Bessel function of order `v`. + yn -- Bessel function of the second kind of integer order and \ + real argument. + yv -- Bessel function of the second kind of real order and \ + complex argument. + yve -- Exponentially scaled Bessel function of the second kind \ + of real order. + kn -- Modified Bessel function of the second kind of integer \ + order `n` + kv -- Modified Bessel function of the second kind of real order \ + `v` + kve -- Exponentially scaled modified Bessel function of the \ + second kind. + iv -- Modified Bessel function of the first kind of real order. + ive -- Exponentially scaled modified Bessel function of the \ + first kind. + hankel1 -- Hankel function of the first kind. + hankel1e -- Exponentially scaled Hankel function of the first kind. + hankel2 -- Hankel function of the second kind. + hankel2e -- Exponentially scaled Hankel function of the second kind. + wright_bessel -- Wright's generalized Bessel function. + +The following function does not accept NumPy arrays (it is not a +universal function): + +.. autosummary:: + :toctree: generated/ + + lmbda -- Jahnke-Emden Lambda function, Lambdav(x). + +Zeros of Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + jnjnp_zeros -- Compute zeros of integer-order Bessel functions Jn and Jn'. + jnyn_zeros -- Compute nt zeros of Bessel functions Jn(x), Jn'(x), Yn(x), and Yn'(x). + jn_zeros -- Compute zeros of integer-order Bessel function Jn(x). + jnp_zeros -- Compute zeros of integer-order Bessel function derivative Jn'(x). + yn_zeros -- Compute zeros of integer-order Bessel function Yn(x). + ynp_zeros -- Compute zeros of integer-order Bessel function derivative Yn'(x). + y0_zeros -- Compute nt zeros of Bessel function Y0(z), and derivative at each zero. + y1_zeros -- Compute nt zeros of Bessel function Y1(z), and derivative at each zero. + y1p_zeros -- Compute nt zeros of Bessel derivative Y1'(z), and value at each zero. + +Faster versions of common Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + j0 -- Bessel function of the first kind of order 0. + j1 -- Bessel function of the first kind of order 1. + y0 -- Bessel function of the second kind of order 0. + y1 -- Bessel function of the second kind of order 1. + i0 -- Modified Bessel function of order 0. + i0e -- Exponentially scaled modified Bessel function of order 0. + i1 -- Modified Bessel function of order 1. + i1e -- Exponentially scaled modified Bessel function of order 1. + k0 -- Modified Bessel function of the second kind of order 0, :math:`K_0`. + k0e -- Exponentially scaled modified Bessel function K of order 0 + k1 -- Modified Bessel function of the second kind of order 1, :math:`K_1(x)`. + k1e -- Exponentially scaled modified Bessel function K of order 1. + +Integrals of Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + itj0y0 -- Integrals of Bessel functions of order 0. + it2j0y0 -- Integrals related to Bessel functions of order 0. + iti0k0 -- Integrals of modified Bessel functions of order 0. + it2i0k0 -- Integrals related to modified Bessel functions of order 0. + besselpoly -- Weighted integral of a Bessel function. + +Derivatives of Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + jvp -- Compute nth derivative of Bessel function Jv(z) with respect to `z`. + yvp -- Compute nth derivative of Bessel function Yv(z) with respect to `z`. + kvp -- Compute nth derivative of real-order modified Bessel function Kv(z) + ivp -- Compute nth derivative of modified Bessel function Iv(z) with respect to `z`. + h1vp -- Compute nth derivative of Hankel function H1v(z) with respect to `z`. + h2vp -- Compute nth derivative of Hankel function H2v(z) with respect to `z`. + +Spherical Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + spherical_jn -- Spherical Bessel function of the first kind or its derivative. + spherical_yn -- Spherical Bessel function of the second kind or its derivative. + spherical_in -- Modified spherical Bessel function of the first kind or its derivative. + spherical_kn -- Modified spherical Bessel function of the second kind or its derivative. + +Riccati-Bessel functions +^^^^^^^^^^^^^^^^^^^^^^^^ + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + riccati_jn -- Compute Ricatti-Bessel function of the first kind and its derivative. + riccati_yn -- Compute Ricatti-Bessel function of the second kind and its derivative. + +Struve functions +---------------- + +.. autosummary:: + :toctree: generated/ + + struve -- Struve function. + modstruve -- Modified Struve function. + itstruve0 -- Integral of the Struve function of order 0. + it2struve0 -- Integral related to the Struve function of order 0. + itmodstruve0 -- Integral of the modified Struve function of order 0. + + +Raw statistical functions +------------------------- + +.. seealso:: :mod:`scipy.stats`: Friendly versions of these functions. + +Binomial distribution +^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + bdtr -- Binomial distribution cumulative distribution function. + bdtrc -- Binomial distribution survival function. + bdtri -- Inverse function to `bdtr` with respect to `p`. + bdtrik -- Inverse function to `bdtr` with respect to `k`. + bdtrin -- Inverse function to `bdtr` with respect to `n`. + +Beta distribution +^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + btdtr -- Cumulative distribution function of the beta distribution. + btdtri -- The `p`-th quantile of the beta distribution. + btdtria -- Inverse of `btdtr` with respect to `a`. + btdtrib -- btdtria(a, p, x). + +F distribution +^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + fdtr -- F cumulative distribution function. + fdtrc -- F survival function. + fdtri -- The `p`-th quantile of the F-distribution. + fdtridfd -- Inverse to `fdtr` vs dfd. + +Gamma distribution +^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + gdtr -- Gamma distribution cumulative distribution function. + gdtrc -- Gamma distribution survival function. + gdtria -- Inverse of `gdtr` vs a. + gdtrib -- Inverse of `gdtr` vs b. + gdtrix -- Inverse of `gdtr` vs x. + +Negative binomial distribution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + nbdtr -- Negative binomial cumulative distribution function. + nbdtrc -- Negative binomial survival function. + nbdtri -- Inverse of `nbdtr` vs `p`. + nbdtrik -- Inverse of `nbdtr` vs `k`. + nbdtrin -- Inverse of `nbdtr` vs `n`. + +Noncentral F distribution +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + ncfdtr -- Cumulative distribution function of the non-central F distribution. + ncfdtridfd -- Calculate degrees of freedom (denominator) for the noncentral F-distribution. + ncfdtridfn -- Calculate degrees of freedom (numerator) for the noncentral F-distribution. + ncfdtri -- Inverse cumulative distribution function of the non-central F distribution. + ncfdtrinc -- Calculate non-centrality parameter for non-central F distribution. + +Noncentral t distribution +^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + nctdtr -- Cumulative distribution function of the non-central `t` distribution. + nctdtridf -- Calculate degrees of freedom for non-central t distribution. + nctdtrit -- Inverse cumulative distribution function of the non-central t distribution. + nctdtrinc -- Calculate non-centrality parameter for non-central t distribution. + +Normal distribution +^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + nrdtrimn -- Calculate mean of normal distribution given other params. + nrdtrisd -- Calculate standard deviation of normal distribution given other params. + ndtr -- Normal cumulative distribution function. + log_ndtr -- Logarithm of normal cumulative distribution function. + ndtri -- Inverse of `ndtr` vs x. + ndtri_exp -- Inverse of `log_ndtr` vs x. + +Poisson distribution +^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + pdtr -- Poisson cumulative distribution function. + pdtrc -- Poisson survival function. + pdtri -- Inverse to `pdtr` vs m. + pdtrik -- Inverse to `pdtr` vs k. + +Student t distribution +^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + stdtr -- Student t distribution cumulative distribution function. + stdtridf -- Inverse of `stdtr` vs df. + stdtrit -- Inverse of `stdtr` vs `t`. + +Chi square distribution +^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + chdtr -- Chi square cumulative distribution function. + chdtrc -- Chi square survival function. + chdtri -- Inverse to `chdtrc`. + chdtriv -- Inverse to `chdtr` vs `v`. + +Non-central chi square distribution +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + chndtr -- Non-central chi square cumulative distribution function. + chndtridf -- Inverse to `chndtr` vs `df`. + chndtrinc -- Inverse to `chndtr` vs `nc`. + chndtrix -- Inverse to `chndtr` vs `x`. + +Kolmogorov distribution +^^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + smirnov -- Kolmogorov-Smirnov complementary cumulative distribution function. + smirnovi -- Inverse to `smirnov`. + kolmogorov -- Complementary cumulative distribution function of Kolmogorov distribution. + kolmogi -- Inverse function to `kolmogorov`. + +Box-Cox transformation +^^^^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + boxcox -- Compute the Box-Cox transformation. + boxcox1p -- Compute the Box-Cox transformation of 1 + `x`. + inv_boxcox -- Compute the inverse of the Box-Cox transformation. + inv_boxcox1p -- Compute the inverse of the Box-Cox transformation. + + +Sigmoidal functions +^^^^^^^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + logit -- Logit ufunc for ndarrays. + expit -- Logistic sigmoid function. + log_expit -- Logarithm of the logistic sigmoid function. + +Miscellaneous +^^^^^^^^^^^^^ + +.. autosummary:: + :toctree: generated/ + + tklmbda -- Tukey-Lambda cumulative distribution function. + owens_t -- Owen's T Function. + + +Information Theory functions +---------------------------- + +.. autosummary:: + :toctree: generated/ + + entr -- Elementwise function for computing entropy. + rel_entr -- Elementwise function for computing relative entropy. + kl_div -- Elementwise function for computing Kullback-Leibler divergence. + huber -- Huber loss function. + pseudo_huber -- Pseudo-Huber loss function. + + +Gamma and related functions +--------------------------- + +.. autosummary:: + :toctree: generated/ + + gamma -- Gamma function. + gammaln -- Logarithm of the absolute value of the Gamma function for real inputs. + loggamma -- Principal branch of the logarithm of the Gamma function. + gammasgn -- Sign of the gamma function. + gammainc -- Regularized lower incomplete gamma function. + gammaincinv -- Inverse to `gammainc`. + gammaincc -- Regularized upper incomplete gamma function. + gammainccinv -- Inverse to `gammaincc`. + beta -- Beta function. + betaln -- Natural logarithm of absolute value of beta function. + betainc -- Incomplete beta integral. + betaincc -- Complemented incomplete beta integral. + betaincinv -- Inverse function to beta integral. + betainccinv -- Inverse of the complemented incomplete beta integral. + psi -- The digamma function. + rgamma -- Gamma function inverted. + polygamma -- Polygamma function n. + multigammaln -- Returns the log of multivariate gamma, also sometimes called the generalized gamma. + digamma -- psi(x[, out]). + poch -- Rising factorial (z)_m. + + +Error function and Fresnel integrals +------------------------------------ + +.. autosummary:: + :toctree: generated/ + + erf -- Returns the error function of complex argument. + erfc -- Complementary error function, ``1 - erf(x)``. + erfcx -- Scaled complementary error function, ``exp(x**2) * erfc(x)``. + erfi -- Imaginary error function, ``-i erf(i z)``. + erfinv -- Inverse function for erf. + erfcinv -- Inverse function for erfc. + wofz -- Faddeeva function. + dawsn -- Dawson's integral. + fresnel -- Fresnel sin and cos integrals. + fresnel_zeros -- Compute nt complex zeros of sine and cosine Fresnel integrals S(z) and C(z). + modfresnelp -- Modified Fresnel positive integrals. + modfresnelm -- Modified Fresnel negative integrals. + voigt_profile -- Voigt profile. + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + erf_zeros -- Compute nt complex zeros of error function erf(z). + fresnelc_zeros -- Compute nt complex zeros of cosine Fresnel integral C(z). + fresnels_zeros -- Compute nt complex zeros of sine Fresnel integral S(z). + +Legendre functions +------------------ + +.. autosummary:: + :toctree: generated/ + + lpmv -- Associated Legendre function of integer order and real degree. + sph_harm -- Compute spherical harmonics. + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + clpmn -- Associated Legendre function of the first kind for complex arguments. + lpn -- Legendre function of the first kind. + lqn -- Legendre function of the second kind. + lpmn -- Sequence of associated Legendre functions of the first kind. + lqmn -- Sequence of associated Legendre functions of the second kind. + +Ellipsoidal harmonics +--------------------- + +.. autosummary:: + :toctree: generated/ + + ellip_harm -- Ellipsoidal harmonic functions E^p_n(l). + ellip_harm_2 -- Ellipsoidal harmonic functions F^p_n(l). + ellip_normal -- Ellipsoidal harmonic normalization constants gamma^p_n. + +Orthogonal polynomials +---------------------- + +The following functions evaluate values of orthogonal polynomials: + +.. autosummary:: + :toctree: generated/ + + assoc_laguerre -- Compute the generalized (associated) Laguerre polynomial of degree n and order k. + eval_legendre -- Evaluate Legendre polynomial at a point. + eval_chebyt -- Evaluate Chebyshev polynomial of the first kind at a point. + eval_chebyu -- Evaluate Chebyshev polynomial of the second kind at a point. + eval_chebyc -- Evaluate Chebyshev polynomial of the first kind on [-2, 2] at a point. + eval_chebys -- Evaluate Chebyshev polynomial of the second kind on [-2, 2] at a point. + eval_jacobi -- Evaluate Jacobi polynomial at a point. + eval_laguerre -- Evaluate Laguerre polynomial at a point. + eval_genlaguerre -- Evaluate generalized Laguerre polynomial at a point. + eval_hermite -- Evaluate physicist's Hermite polynomial at a point. + eval_hermitenorm -- Evaluate probabilist's (normalized) Hermite polynomial at a point. + eval_gegenbauer -- Evaluate Gegenbauer polynomial at a point. + eval_sh_legendre -- Evaluate shifted Legendre polynomial at a point. + eval_sh_chebyt -- Evaluate shifted Chebyshev polynomial of the first kind at a point. + eval_sh_chebyu -- Evaluate shifted Chebyshev polynomial of the second kind at a point. + eval_sh_jacobi -- Evaluate shifted Jacobi polynomial at a point. + +The following functions compute roots and quadrature weights for +orthogonal polynomials: + +.. autosummary:: + :toctree: generated/ + + roots_legendre -- Gauss-Legendre quadrature. + roots_chebyt -- Gauss-Chebyshev (first kind) quadrature. + roots_chebyu -- Gauss-Chebyshev (second kind) quadrature. + roots_chebyc -- Gauss-Chebyshev (first kind) quadrature. + roots_chebys -- Gauss-Chebyshev (second kind) quadrature. + roots_jacobi -- Gauss-Jacobi quadrature. + roots_laguerre -- Gauss-Laguerre quadrature. + roots_genlaguerre -- Gauss-generalized Laguerre quadrature. + roots_hermite -- Gauss-Hermite (physicst's) quadrature. + roots_hermitenorm -- Gauss-Hermite (statistician's) quadrature. + roots_gegenbauer -- Gauss-Gegenbauer quadrature. + roots_sh_legendre -- Gauss-Legendre (shifted) quadrature. + roots_sh_chebyt -- Gauss-Chebyshev (first kind, shifted) quadrature. + roots_sh_chebyu -- Gauss-Chebyshev (second kind, shifted) quadrature. + roots_sh_jacobi -- Gauss-Jacobi (shifted) quadrature. + +The functions below, in turn, return the polynomial coefficients in +``orthopoly1d`` objects, which function similarly as `numpy.poly1d`. +The ``orthopoly1d`` class also has an attribute ``weights``, which returns +the roots, weights, and total weights for the appropriate form of Gaussian +quadrature. These are returned in an ``n x 3`` array with roots in the first +column, weights in the second column, and total weights in the final column. +Note that ``orthopoly1d`` objects are converted to `~numpy.poly1d` when doing +arithmetic, and lose information of the original orthogonal polynomial. + +.. autosummary:: + :toctree: generated/ + + legendre -- Legendre polynomial. + chebyt -- Chebyshev polynomial of the first kind. + chebyu -- Chebyshev polynomial of the second kind. + chebyc -- Chebyshev polynomial of the first kind on :math:`[-2, 2]`. + chebys -- Chebyshev polynomial of the second kind on :math:`[-2, 2]`. + jacobi -- Jacobi polynomial. + laguerre -- Laguerre polynomial. + genlaguerre -- Generalized (associated) Laguerre polynomial. + hermite -- Physicist's Hermite polynomial. + hermitenorm -- Normalized (probabilist's) Hermite polynomial. + gegenbauer -- Gegenbauer (ultraspherical) polynomial. + sh_legendre -- Shifted Legendre polynomial. + sh_chebyt -- Shifted Chebyshev polynomial of the first kind. + sh_chebyu -- Shifted Chebyshev polynomial of the second kind. + sh_jacobi -- Shifted Jacobi polynomial. + +.. warning:: + + Computing values of high-order polynomials (around ``order > 20``) using + polynomial coefficients is numerically unstable. To evaluate polynomial + values, the ``eval_*`` functions should be used instead. + + +Hypergeometric functions +------------------------ + +.. autosummary:: + :toctree: generated/ + + hyp2f1 -- Gauss hypergeometric function 2F1(a, b; c; z). + hyp1f1 -- Confluent hypergeometric function 1F1(a, b; x). + hyperu -- Confluent hypergeometric function U(a, b, x) of the second kind. + hyp0f1 -- Confluent hypergeometric limit function 0F1. + + +Parabolic cylinder functions +---------------------------- + +.. autosummary:: + :toctree: generated/ + + pbdv -- Parabolic cylinder function D. + pbvv -- Parabolic cylinder function V. + pbwa -- Parabolic cylinder function W. + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + pbdv_seq -- Parabolic cylinder functions Dv(x) and derivatives. + pbvv_seq -- Parabolic cylinder functions Vv(x) and derivatives. + pbdn_seq -- Parabolic cylinder functions Dn(z) and derivatives. + +Mathieu and related functions +----------------------------- + +.. autosummary:: + :toctree: generated/ + + mathieu_a -- Characteristic value of even Mathieu functions. + mathieu_b -- Characteristic value of odd Mathieu functions. + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + mathieu_even_coef -- Fourier coefficients for even Mathieu and modified Mathieu functions. + mathieu_odd_coef -- Fourier coefficients for even Mathieu and modified Mathieu functions. + +The following return both function and first derivative: + +.. autosummary:: + :toctree: generated/ + + mathieu_cem -- Even Mathieu function and its derivative. + mathieu_sem -- Odd Mathieu function and its derivative. + mathieu_modcem1 -- Even modified Mathieu function of the first kind and its derivative. + mathieu_modcem2 -- Even modified Mathieu function of the second kind and its derivative. + mathieu_modsem1 -- Odd modified Mathieu function of the first kind and its derivative. + mathieu_modsem2 -- Odd modified Mathieu function of the second kind and its derivative. + +Spheroidal wave functions +------------------------- + +.. autosummary:: + :toctree: generated/ + + pro_ang1 -- Prolate spheroidal angular function of the first kind and its derivative. + pro_rad1 -- Prolate spheroidal radial function of the first kind and its derivative. + pro_rad2 -- Prolate spheroidal radial function of the second kind and its derivative. + obl_ang1 -- Oblate spheroidal angular function of the first kind and its derivative. + obl_rad1 -- Oblate spheroidal radial function of the first kind and its derivative. + obl_rad2 -- Oblate spheroidal radial function of the second kind and its derivative. + pro_cv -- Characteristic value of prolate spheroidal function. + obl_cv -- Characteristic value of oblate spheroidal function. + pro_cv_seq -- Characteristic values for prolate spheroidal wave functions. + obl_cv_seq -- Characteristic values for oblate spheroidal wave functions. + +The following functions require pre-computed characteristic value: + +.. autosummary:: + :toctree: generated/ + + pro_ang1_cv -- Prolate spheroidal angular function pro_ang1 for precomputed characteristic value. + pro_rad1_cv -- Prolate spheroidal radial function pro_rad1 for precomputed characteristic value. + pro_rad2_cv -- Prolate spheroidal radial function pro_rad2 for precomputed characteristic value. + obl_ang1_cv -- Oblate spheroidal angular function obl_ang1 for precomputed characteristic value. + obl_rad1_cv -- Oblate spheroidal radial function obl_rad1 for precomputed characteristic value. + obl_rad2_cv -- Oblate spheroidal radial function obl_rad2 for precomputed characteristic value. + +Kelvin functions +---------------- + +.. autosummary:: + :toctree: generated/ + + kelvin -- Kelvin functions as complex numbers. + kelvin_zeros -- Compute nt zeros of all Kelvin functions. + ber -- Kelvin function ber. + bei -- Kelvin function bei + berp -- Derivative of the Kelvin function `ber`. + beip -- Derivative of the Kelvin function `bei`. + ker -- Kelvin function ker. + kei -- Kelvin function ker. + kerp -- Derivative of the Kelvin function ker. + keip -- Derivative of the Kelvin function kei. + +The following functions do not accept NumPy arrays (they are not +universal functions): + +.. autosummary:: + :toctree: generated/ + + ber_zeros -- Compute nt zeros of the Kelvin function ber(x). + bei_zeros -- Compute nt zeros of the Kelvin function bei(x). + berp_zeros -- Compute nt zeros of the Kelvin function ber'(x). + beip_zeros -- Compute nt zeros of the Kelvin function bei'(x). + ker_zeros -- Compute nt zeros of the Kelvin function ker(x). + kei_zeros -- Compute nt zeros of the Kelvin function kei(x). + kerp_zeros -- Compute nt zeros of the Kelvin function ker'(x). + keip_zeros -- Compute nt zeros of the Kelvin function kei'(x). + +Combinatorics +------------- + +.. autosummary:: + :toctree: generated/ + + comb -- The number of combinations of N things taken k at a time. + perm -- Permutations of N things taken k at a time, i.e., k-permutations of N. + stirling2 -- Stirling numbers of the second kind. + +Lambert W and related functions +------------------------------- + +.. autosummary:: + :toctree: generated/ + + lambertw -- Lambert W function. + wrightomega -- Wright Omega function. + +Other special functions +----------------------- + +.. autosummary:: + :toctree: generated/ + + agm -- Arithmetic, Geometric Mean. + bernoulli -- Bernoulli numbers B0..Bn (inclusive). + binom -- Binomial coefficient + diric -- Periodic sinc function, also called the Dirichlet function. + euler -- Euler numbers E0..En (inclusive). + expn -- Exponential integral E_n. + exp1 -- Exponential integral E_1 of complex argument z. + expi -- Exponential integral Ei. + factorial -- The factorial of a number or array of numbers. + factorial2 -- Double factorial. + factorialk -- Multifactorial of n of order k, n(!!...!). + shichi -- Hyperbolic sine and cosine integrals. + sici -- Sine and cosine integrals. + softmax -- Softmax function. + log_softmax -- Logarithm of softmax function. + spence -- Spence's function, also known as the dilogarithm. + zeta -- Riemann zeta function. + zetac -- Riemann zeta function minus 1. + +Convenience functions +--------------------- + +.. autosummary:: + :toctree: generated/ + + cbrt -- Cube root of `x`. + exp10 -- 10**x. + exp2 -- 2**x. + radian -- Convert from degrees to radians. + cosdg -- Cosine of the angle `x` given in degrees. + sindg -- Sine of angle given in degrees. + tandg -- Tangent of angle x given in degrees. + cotdg -- Cotangent of the angle `x` given in degrees. + log1p -- Calculates log(1+x) for use when `x` is near zero. + expm1 -- ``exp(x) - 1`` for use when `x` is near zero. + cosm1 -- ``cos(x) - 1`` for use when `x` is near zero. + powm1 -- ``x**y - 1`` for use when `y` is near zero or `x` is near 1. + round -- Round to nearest integer. + xlogy -- Compute ``x*log(y)`` so that the result is 0 if ``x = 0``. + xlog1py -- Compute ``x*log1p(y)`` so that the result is 0 if ``x = 0``. + logsumexp -- Compute the log of the sum of exponentials of input elements. + exprel -- Relative error exponential, (exp(x)-1)/x, for use when `x` is near zero. + sinc -- Return the sinc function. + +""" # noqa: E501 + +import warnings + +from ._sf_error import SpecialFunctionWarning, SpecialFunctionError + +from . import _ufuncs +from ._ufuncs import * + +# Replace some function definitions from _ufuncs to add Array API support +from ._support_alternative_backends import ( + log_ndtr, ndtr, ndtri, erf, erfc, i0, i0e, i1, i1e, + gammaln, gammainc, gammaincc, logit, expit) + +from . import _basic +from ._basic import * + +from ._logsumexp import logsumexp, softmax, log_softmax + +from . import _orthogonal +from ._orthogonal import * + +from ._spfun_stats import multigammaln +from ._ellip_harm import ( + ellip_harm, + ellip_harm_2, + ellip_normal +) +from ._lambertw import lambertw +from ._spherical_bessel import ( + spherical_jn, + spherical_yn, + spherical_in, + spherical_kn +) + +# Deprecated namespaces, to be removed in v2.0.0 +from . import add_newdocs, basic, orthogonal, specfun, sf_error, spfun_stats + +# We replace some function definitions from _ufuncs with those from +# _support_alternative_backends above, but those are all listed in _ufuncs.__all__, +# so there is no need to consider _support_alternative_backends.__all__ here. +__all__ = _ufuncs.__all__ + _basic.__all__ + _orthogonal.__all__ +__all__ += [ + 'SpecialFunctionWarning', + 'SpecialFunctionError', + 'logsumexp', + 'softmax', + 'log_softmax', + 'multigammaln', + 'ellip_harm', + 'ellip_harm_2', + 'ellip_normal', + 'lambertw', + 'spherical_jn', + 'spherical_yn', + 'spherical_in', + 'spherical_kn', +] + +from scipy._lib._testutils import PytestTester +test = PytestTester(__name__) +del PytestTester + +_depr_msg = ('\nThis function was deprecated in SciPy 1.12.0, and will be ' + 'removed in SciPy 1.14.0. Use scipy.special.{} instead.') + + +def btdtr(*args, **kwargs): # type: ignore [no-redef] + warnings.warn(_depr_msg.format('betainc'), category=DeprecationWarning, + stacklevel=2) + return _ufuncs.btdtr(*args, **kwargs) + + +btdtr.__doc__ = _ufuncs.btdtr.__doc__ # type: ignore [misc] + + +def btdtri(*args, **kwargs): # type: ignore [no-redef] + warnings.warn(_depr_msg.format('betaincinv'), category=DeprecationWarning, + stacklevel=2) + return _ufuncs.btdtri(*args, **kwargs) + + +btdtri.__doc__ = _ufuncs.btdtri.__doc__ # type: ignore [misc] + + +def _get_include(): + """This function is for development purposes only. + + This function could disappear or its behavior could change at any time. + """ + import os + return os.path.dirname(__file__) diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/_basic.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/_basic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a247f137a82f448743bc10bfa12d9f00aadc98d Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/_basic.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/_ellip_harm.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/_ellip_harm.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f9aa719f03e6a2e26c3eaa059b42c3ba5708cf5c Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/_ellip_harm.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/_lambertw.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/_lambertw.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f96952fe9123b67a472433fa6322bbfa7b1a8e6 Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/_lambertw.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/_logsumexp.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/_logsumexp.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..18c23edb52cfb2bfd39ff7ee37b656f25468f2eb Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/_logsumexp.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/add_newdocs.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/add_newdocs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5e63fdbc3779b9e686efd49f6a03028065f44f09 Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/add_newdocs.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/basic.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/basic.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72b5ada1c96e7b0eedbfe622fb5c48691983064d Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/basic.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/orthogonal.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/orthogonal.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3c13851ec5659da4d96b34749afc1fc18f7a209c Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/orthogonal.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/sf_error.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/sf_error.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f09f4b271753833510cdf06d8052468ad9502dc9 Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/sf_error.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/specfun.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/specfun.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1d821d2b89e21e94dc2d4e2136c40ff0f113da5c Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/specfun.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/__pycache__/spfun_stats.cpython-39.pyc b/.venv/Lib/site-packages/scipy/special/__pycache__/spfun_stats.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dcd0bc2c8b469c9e482aecce4085e8b752ffc7f1 Binary files /dev/null and b/.venv/Lib/site-packages/scipy/special/__pycache__/spfun_stats.cpython-39.pyc differ diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/__init__.py b/.venv/Lib/site-packages/scipy/special/_precompute/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/cosine_cdf.py b/.venv/Lib/site-packages/scipy/special/_precompute/cosine_cdf.py new file mode 100644 index 0000000000000000000000000000000000000000..b09d1309c33da7e3bc2d466cb7b0402fcf12bf52 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/cosine_cdf.py @@ -0,0 +1,17 @@ +import mpmath + + +def f(x): + return (mpmath.pi + x + mpmath.sin(x)) / (2*mpmath.pi) + + +# Note: 40 digits might be overkill; a few more digits than the default +# might be sufficient. +mpmath.mp.dps = 40 +ts = mpmath.taylor(f, -mpmath.pi, 20) +p, q = mpmath.pade(ts, 9, 10) + +p = [float(c) for c in p] +q = [float(c) for c in q] +print('p =', p) +print('q =', q) diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/expn_asy.py b/.venv/Lib/site-packages/scipy/special/_precompute/expn_asy.py new file mode 100644 index 0000000000000000000000000000000000000000..29a0d5b58c2c6e236f1775e9f57cab3ab74efc41 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/expn_asy.py @@ -0,0 +1,54 @@ +"""Precompute the polynomials for the asymptotic expansion of the +generalized exponential integral. + +Sources +------- +[1] NIST, Digital Library of Mathematical Functions, + https://dlmf.nist.gov/8.20#ii + +""" +import os + +try: + import sympy + from sympy import Poly + x = sympy.symbols('x') +except ImportError: + pass + + +def generate_A(K): + A = [Poly(1, x)] + for k in range(K): + A.append(Poly(1 - 2*k*x, x)*A[k] + Poly(x*(x + 1))*A[k].diff()) + return A + + +WARNING = """\ +/* This file was automatically generated by _precompute/expn_asy.py. + * Do not edit it manually! + */ +""" + + +def main(): + print(__doc__) + fn = os.path.join('..', 'cephes', 'expn.h') + + K = 12 + A = generate_A(K) + with open(fn + '.new', 'w') as f: + f.write(WARNING) + f.write(f"#define nA {len(A)}\n") + for k, Ak in enumerate(A): + ', '.join([str(x.evalf(18)) for x in Ak.coeffs()]) + f.write(f"static const double A{k}[] = {{tmp}};\n") + ", ".join([f"A{k}" for k in range(K + 1)]) + f.write("static const double *A[] = {{tmp}};\n") + ", ".join([str(Ak.degree()) for Ak in A]) + f.write("static const int Adegs[] = {{tmp}};\n") + os.rename(fn + '.new', fn) + + +if __name__ == "__main__": + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_asy.py b/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_asy.py new file mode 100644 index 0000000000000000000000000000000000000000..b4aace2dabd09d3cf05fa2e31d7670f12264447b --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_asy.py @@ -0,0 +1,116 @@ +""" +Precompute coefficients of Temme's asymptotic expansion for gammainc. + +This takes about 8 hours to run on a 2.3 GHz Macbook Pro with 4GB ram. + +Sources: +[1] NIST, "Digital Library of Mathematical Functions", + https://dlmf.nist.gov/ + +""" +import os +from scipy.special._precompute.utils import lagrange_inversion + +try: + import mpmath as mp +except ImportError: + pass + + +def compute_a(n): + """a_k from DLMF 5.11.6""" + a = [mp.sqrt(2)/2] + for k in range(1, n): + ak = a[-1]/k + for j in range(1, len(a)): + ak -= a[j]*a[-j]/(j + 1) + ak /= a[0]*(1 + mp.mpf(1)/(k + 1)) + a.append(ak) + return a + + +def compute_g(n): + """g_k from DLMF 5.11.3/5.11.5""" + a = compute_a(2*n) + g = [mp.sqrt(2)*mp.rf(0.5, k)*a[2*k] for k in range(n)] + return g + + +def eta(lam): + """Function from DLMF 8.12.1 shifted to be centered at 0.""" + if lam > 0: + return mp.sqrt(2*(lam - mp.log(lam + 1))) + elif lam < 0: + return -mp.sqrt(2*(lam - mp.log(lam + 1))) + else: + return 0 + + +def compute_alpha(n): + """alpha_n from DLMF 8.12.13""" + coeffs = mp.taylor(eta, 0, n - 1) + return lagrange_inversion(coeffs) + + +def compute_d(K, N): + """d_{k, n} from DLMF 8.12.12""" + M = N + 2*K + d0 = [-mp.mpf(1)/3] + alpha = compute_alpha(M + 2) + for n in range(1, M): + d0.append((n + 2)*alpha[n+2]) + d = [d0] + g = compute_g(K) + for k in range(1, K): + dk = [] + for n in range(M - 2*k): + dk.append((-1)**k*g[k]*d[0][n] + (n + 2)*d[k-1][n+2]) + d.append(dk) + for k in range(K): + d[k] = d[k][:N] + return d + + +header = \ +r"""/* This file was automatically generated by _precomp/gammainc.py. + * Do not edit it manually! + */ + +#ifndef IGAM_H +#define IGAM_H + +#define K {} +#define N {} + +static const double d[K][N] = +{{""" + +footer = \ +r""" +#endif +""" + + +def main(): + print(__doc__) + K = 25 + N = 25 + with mp.workdps(50): + d = compute_d(K, N) + fn = os.path.join(os.path.dirname(__file__), '..', 'cephes', 'igam.h') + with open(fn + '.new', 'w') as f: + f.write(header.format(K, N)) + for k, row in enumerate(d): + row = [mp.nstr(x, 17, min_fixed=0, max_fixed=0) for x in row] + f.write('{') + f.write(", ".join(row)) + if k < K - 1: + f.write('},\n') + else: + f.write('}};\n') + f.write(footer) + os.rename(fn + '.new', fn) + + +if __name__ == "__main__": + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_data.py b/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_data.py new file mode 100644 index 0000000000000000000000000000000000000000..dfa2567db018515912411e7381cde4fff33a74bf --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/gammainc_data.py @@ -0,0 +1,124 @@ +"""Compute gammainc and gammaincc for large arguments and parameters +and save the values to data files for use in tests. We can't just +compare to mpmath's gammainc in test_mpmath.TestSystematic because it +would take too long. + +Note that mpmath's gammainc is computed using hypercomb, but since it +doesn't allow the user to increase the maximum number of terms used in +the series it doesn't converge for many arguments. To get around this +we copy the mpmath implementation but use more terms. + +This takes about 17 minutes to run on a 2.3 GHz Macbook Pro with 4GB +ram. + +Sources: +[1] Fredrik Johansson and others. mpmath: a Python library for + arbitrary-precision floating-point arithmetic (version 0.19), + December 2013. http://mpmath.org/. + +""" +import os +from time import time +import numpy as np +from numpy import pi + +from scipy.special._mptestutils import mpf2float + +try: + import mpmath as mp +except ImportError: + pass + + +def gammainc(a, x, dps=50, maxterms=10**8): + """Compute gammainc exactly like mpmath does but allow for more + summands in hypercomb. See + + mpmath/functions/expintegrals.py#L134 + + in the mpmath github repository. + + """ + with mp.workdps(dps): + z, a, b = mp.mpf(a), mp.mpf(x), mp.mpf(x) + G = [z] + negb = mp.fneg(b, exact=True) + + def h(z): + T1 = [mp.exp(negb), b, z], [1, z, -1], [], G, [1], [1+z], b + return (T1,) + + res = mp.hypercomb(h, [z], maxterms=maxterms) + return mpf2float(res) + + +def gammaincc(a, x, dps=50, maxterms=10**8): + """Compute gammaincc exactly like mpmath does but allow for more + terms in hypercomb. See + + mpmath/functions/expintegrals.py#L187 + + in the mpmath github repository. + + """ + with mp.workdps(dps): + z, a = a, x + + if mp.isint(z): + try: + # mpmath has a fast integer path + return mpf2float(mp.gammainc(z, a=a, regularized=True)) + except mp.libmp.NoConvergence: + pass + nega = mp.fneg(a, exact=True) + G = [z] + # Use 2F0 series when possible; fall back to lower gamma representation + try: + def h(z): + r = z-1 + return [([mp.exp(nega), a], [1, r], [], G, [1, -r], [], 1/nega)] + return mpf2float(mp.hypercomb(h, [z], force_series=True)) + except mp.libmp.NoConvergence: + def h(z): + T1 = [], [1, z-1], [z], G, [], [], 0 + T2 = [-mp.exp(nega), a, z], [1, z, -1], [], G, [1], [1+z], a + return T1, T2 + return mpf2float(mp.hypercomb(h, [z], maxterms=maxterms)) + + +def main(): + t0 = time() + # It would be nice to have data for larger values, but either this + # requires prohibitively large precision (dps > 800) or mpmath has + # a bug. For example, gammainc(1e20, 1e20, dps=800) returns a + # value around 0.03, while the true value should be close to 0.5 + # (DLMF 8.12.15). + print(__doc__) + pwd = os.path.dirname(__file__) + r = np.logspace(4, 14, 30) + ltheta = np.logspace(np.log10(pi/4), np.log10(np.arctan(0.6)), 30) + utheta = np.logspace(np.log10(pi/4), np.log10(np.arctan(1.4)), 30) + + regimes = [(gammainc, ltheta), (gammaincc, utheta)] + for func, theta in regimes: + rg, thetag = np.meshgrid(r, theta) + a, x = rg*np.cos(thetag), rg*np.sin(thetag) + a, x = a.flatten(), x.flatten() + dataset = [] + for i, (a0, x0) in enumerate(zip(a, x)): + if func == gammaincc: + # Exploit the fast integer path in gammaincc whenever + # possible so that the computation doesn't take too + # long + a0, x0 = np.floor(a0), np.floor(x0) + dataset.append((a0, x0, func(a0, x0))) + dataset = np.array(dataset) + filename = os.path.join(pwd, '..', 'tests', 'data', 'local', + f'{func.__name__}.txt') + np.savetxt(filename, dataset) + + print(f"{(time() - t0)/60} minutes elapsed") + + +if __name__ == "__main__": + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/lambertw.py b/.venv/Lib/site-packages/scipy/special/_precompute/lambertw.py new file mode 100644 index 0000000000000000000000000000000000000000..dd7e1e8b11a6b8f5dd43d01e4d8f33745fc50384 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/lambertw.py @@ -0,0 +1,68 @@ +"""Compute a Pade approximation for the principal branch of the +Lambert W function around 0 and compare it to various other +approximations. + +""" +import numpy as np + +try: + import mpmath + import matplotlib.pyplot as plt +except ImportError: + pass + + +def lambertw_pade(): + derivs = [mpmath.diff(mpmath.lambertw, 0, n=n) for n in range(6)] + p, q = mpmath.pade(derivs, 3, 2) + return p, q + + +def main(): + print(__doc__) + with mpmath.workdps(50): + p, q = lambertw_pade() + p, q = p[::-1], q[::-1] + print(f"p = {p}") + print(f"q = {q}") + + x, y = np.linspace(-1.5, 1.5, 75), np.linspace(-1.5, 1.5, 75) + x, y = np.meshgrid(x, y) + z = x + 1j*y + lambertw_std = [] + for z0 in z.flatten(): + lambertw_std.append(complex(mpmath.lambertw(z0))) + lambertw_std = np.array(lambertw_std).reshape(x.shape) + + fig, axes = plt.subplots(nrows=3, ncols=1) + # Compare Pade approximation to true result + p = np.array([float(p0) for p0 in p]) + q = np.array([float(q0) for q0 in q]) + pade_approx = np.polyval(p, z)/np.polyval(q, z) + pade_err = abs(pade_approx - lambertw_std) + axes[0].pcolormesh(x, y, pade_err) + # Compare two terms of asymptotic series to true result + asy_approx = np.log(z) - np.log(np.log(z)) + asy_err = abs(asy_approx - lambertw_std) + axes[1].pcolormesh(x, y, asy_err) + # Compare two terms of the series around the branch point to the + # true result + p = np.sqrt(2*(np.exp(1)*z + 1)) + series_approx = -1 + p - p**2/3 + series_err = abs(series_approx - lambertw_std) + im = axes[2].pcolormesh(x, y, series_err) + + fig.colorbar(im, ax=axes.ravel().tolist()) + plt.show() + + fig, ax = plt.subplots(nrows=1, ncols=1) + pade_better = pade_err < asy_err + im = ax.pcolormesh(x, y, pade_better) + t = np.linspace(-0.3, 0.3) + ax.plot(-2.5*abs(t) - 0.2, t, 'r') + fig.colorbar(im, ax=ax) + plt.show() + + +if __name__ == '__main__': + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/loggamma.py b/.venv/Lib/site-packages/scipy/special/_precompute/loggamma.py new file mode 100644 index 0000000000000000000000000000000000000000..3ee34b528bfc6d112a8cf52b05dc66c20ea41e8f --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/loggamma.py @@ -0,0 +1,43 @@ +"""Precompute series coefficients for log-Gamma.""" + +try: + import mpmath +except ImportError: + pass + + +def stirling_series(N): + with mpmath.workdps(100): + coeffs = [mpmath.bernoulli(2*n)/(2*n*(2*n - 1)) + for n in range(1, N + 1)] + return coeffs + + +def taylor_series_at_1(N): + coeffs = [] + with mpmath.workdps(100): + coeffs.append(-mpmath.euler) + for n in range(2, N + 1): + coeffs.append((-1)**n*mpmath.zeta(n)/n) + return coeffs + + +def main(): + print(__doc__) + print() + stirling_coeffs = [mpmath.nstr(x, 20, min_fixed=0, max_fixed=0) + for x in stirling_series(8)[::-1]] + taylor_coeffs = [mpmath.nstr(x, 20, min_fixed=0, max_fixed=0) + for x in taylor_series_at_1(23)[::-1]] + print("Stirling series coefficients") + print("----------------------------") + print("\n".join(stirling_coeffs)) + print() + print("Taylor series coefficients") + print("--------------------------") + print("\n".join(taylor_coeffs)) + print() + + +if __name__ == '__main__': + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/struve_convergence.py b/.venv/Lib/site-packages/scipy/special/_precompute/struve_convergence.py new file mode 100644 index 0000000000000000000000000000000000000000..cf55a0f47f9cdd97f6e2434eaff95eebf738a5c4 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/struve_convergence.py @@ -0,0 +1,131 @@ +""" +Convergence regions of the expansions used in ``struve.c`` + +Note that for v >> z both functions tend rapidly to 0, +and for v << -z, they tend to infinity. + +The floating-point functions over/underflow in the lower left and right +corners of the figure. + + +Figure legend +============= + +Red region + Power series is close (1e-12) to the mpmath result + +Blue region + Asymptotic series is close to the mpmath result + +Green region + Bessel series is close to the mpmath result + +Dotted colored lines + Boundaries of the regions + +Solid colored lines + Boundaries estimated by the routine itself. These will be used + for determining which of the results to use. + +Black dashed line + The line z = 0.7*|v| + 12 + +""" +import numpy as np +import matplotlib.pyplot as plt + +import mpmath + + +def err_metric(a, b, atol=1e-290): + m = abs(a - b) / (atol + abs(b)) + m[np.isinf(b) & (a == b)] = 0 + return m + + +def do_plot(is_h=True): + from scipy.special._ufuncs import (_struve_power_series, + _struve_asymp_large_z, + _struve_bessel_series) + + vs = np.linspace(-1000, 1000, 91) + zs = np.sort(np.r_[1e-5, 1.0, np.linspace(0, 700, 91)[1:]]) + + rp = _struve_power_series(vs[:,None], zs[None,:], is_h) + ra = _struve_asymp_large_z(vs[:,None], zs[None,:], is_h) + rb = _struve_bessel_series(vs[:,None], zs[None,:], is_h) + + mpmath.mp.dps = 50 + if is_h: + def sh(v, z): + return float(mpmath.struveh(mpmath.mpf(v), mpmath.mpf(z))) + else: + def sh(v, z): + return float(mpmath.struvel(mpmath.mpf(v), mpmath.mpf(z))) + ex = np.vectorize(sh, otypes='d')(vs[:,None], zs[None,:]) + + err_a = err_metric(ra[0], ex) + 1e-300 + err_p = err_metric(rp[0], ex) + 1e-300 + err_b = err_metric(rb[0], ex) + 1e-300 + + err_est_a = abs(ra[1]/ra[0]) + err_est_p = abs(rp[1]/rp[0]) + err_est_b = abs(rb[1]/rb[0]) + + z_cutoff = 0.7*abs(vs) + 12 + + levels = [-1000, -12] + + plt.cla() + + plt.hold(1) + plt.contourf(vs, zs, np.log10(err_p).T, + levels=levels, colors=['r', 'r'], alpha=0.1) + plt.contourf(vs, zs, np.log10(err_a).T, + levels=levels, colors=['b', 'b'], alpha=0.1) + plt.contourf(vs, zs, np.log10(err_b).T, + levels=levels, colors=['g', 'g'], alpha=0.1) + + plt.contour(vs, zs, np.log10(err_p).T, + levels=levels, colors=['r', 'r'], linestyles=[':', ':']) + plt.contour(vs, zs, np.log10(err_a).T, + levels=levels, colors=['b', 'b'], linestyles=[':', ':']) + plt.contour(vs, zs, np.log10(err_b).T, + levels=levels, colors=['g', 'g'], linestyles=[':', ':']) + + lp = plt.contour(vs, zs, np.log10(err_est_p).T, + levels=levels, colors=['r', 'r'], linestyles=['-', '-']) + la = plt.contour(vs, zs, np.log10(err_est_a).T, + levels=levels, colors=['b', 'b'], linestyles=['-', '-']) + lb = plt.contour(vs, zs, np.log10(err_est_b).T, + levels=levels, colors=['g', 'g'], linestyles=['-', '-']) + + plt.clabel(lp, fmt={-1000: 'P', -12: 'P'}) + plt.clabel(la, fmt={-1000: 'A', -12: 'A'}) + plt.clabel(lb, fmt={-1000: 'B', -12: 'B'}) + + plt.plot(vs, z_cutoff, 'k--') + + plt.xlim(vs.min(), vs.max()) + plt.ylim(zs.min(), zs.max()) + + plt.xlabel('v') + plt.ylabel('z') + + +def main(): + plt.clf() + plt.subplot(121) + do_plot(True) + plt.title('Struve H') + + plt.subplot(122) + do_plot(False) + plt.title('Struve L') + + plt.savefig('struve_convergence.png') + plt.show() + + +if __name__ == "__main__": + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/utils.py b/.venv/Lib/site-packages/scipy/special/_precompute/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..30e412909149655b2a74bf179a076f4e3e0140c2 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/utils.py @@ -0,0 +1,38 @@ +try: + import mpmath as mp +except ImportError: + pass + +try: + from sympy.abc import x +except ImportError: + pass + + +def lagrange_inversion(a): + """Given a series + + f(x) = a[1]*x + a[2]*x**2 + ... + a[n-1]*x**(n - 1), + + use the Lagrange inversion formula to compute a series + + g(x) = b[1]*x + b[2]*x**2 + ... + b[n-1]*x**(n - 1) + + so that f(g(x)) = g(f(x)) = x mod x**n. We must have a[0] = 0, so + necessarily b[0] = 0 too. + + The algorithm is naive and could be improved, but speed isn't an + issue here and it's easy to read. + + """ + n = len(a) + f = sum(a[i]*x**i for i in range(n)) + h = (x/f).series(x, 0, n).removeO() + hpower = [h**0] + for k in range(n): + hpower.append((hpower[-1]*h).expand()) + b = [mp.mpf(0)] + for k in range(1, n): + b.append(hpower[k].coeff(x, k - 1)/k) + b = [mp.mpf(x) for x in b] + return b diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel.py b/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel.py new file mode 100644 index 0000000000000000000000000000000000000000..0274852bf2685055e44a67c2e0ff2889ea0f560f --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel.py @@ -0,0 +1,342 @@ +"""Precompute coefficients of several series expansions +of Wright's generalized Bessel function Phi(a, b, x). + +See https://dlmf.nist.gov/10.46.E1 with rho=a, beta=b, z=x. +""" +from argparse import ArgumentParser, RawTextHelpFormatter +import numpy as np +from scipy.integrate import quad +from scipy.optimize import minimize_scalar, curve_fit +from time import time + +try: + import sympy + from sympy import EulerGamma, Rational, S, Sum, \ + factorial, gamma, gammasimp, pi, polygamma, symbols, zeta + from sympy.polys.polyfuncs import horner +except ImportError: + pass + + +def series_small_a(): + """Tylor series expansion of Phi(a, b, x) in a=0 up to order 5. + """ + order = 5 + a, b, x, k = symbols("a b x k") + A = [] # terms with a + X = [] # terms with x + B = [] # terms with b (polygammas) + # Phi(a, b, x) = exp(x)/gamma(b) * sum(A[i] * X[i] * B[i]) + expression = Sum(x**k/factorial(k)/gamma(a*k+b), (k, 0, S.Infinity)) + expression = gamma(b)/sympy.exp(x) * expression + + # nth term of taylor series in a=0: a^n/n! * (d^n Phi(a, b, x)/da^n at a=0) + for n in range(0, order+1): + term = expression.diff(a, n).subs(a, 0).simplify().doit() + # set the whole bracket involving polygammas to 1 + x_part = (term.subs(polygamma(0, b), 1) + .replace(polygamma, lambda *args: 0)) + # sign convention: x part always positive + x_part *= (-1)**n + + A.append(a**n/factorial(n)) + X.append(horner(x_part)) + B.append(horner((term/x_part).simplify())) + + s = "Tylor series expansion of Phi(a, b, x) in a=0 up to order 5.\n" + s += "Phi(a, b, x) = exp(x)/gamma(b) * sum(A[i] * X[i] * B[i], i=0..5)\n" + for name, c in zip(['A', 'X', 'B'], [A, X, B]): + for i in range(len(c)): + s += f"\n{name}[{i}] = " + str(c[i]) + return s + + +# expansion of digamma +def dg_series(z, n): + """Symbolic expansion of digamma(z) in z=0 to order n. + + See https://dlmf.nist.gov/5.7.E4 and with https://dlmf.nist.gov/5.5.E2 + """ + k = symbols("k") + return -1/z - EulerGamma + \ + sympy.summation((-1)**k * zeta(k) * z**(k-1), (k, 2, n+1)) + + +def pg_series(k, z, n): + """Symbolic expansion of polygamma(k, z) in z=0 to order n.""" + return sympy.diff(dg_series(z, n+k), z, k) + + +def series_small_a_small_b(): + """Tylor series expansion of Phi(a, b, x) in a=0 and b=0 up to order 5. + + Be aware of cancellation of poles in b=0 of digamma(b)/Gamma(b) and + polygamma functions. + + digamma(b)/Gamma(b) = -1 - 2*M_EG*b + O(b^2) + digamma(b)^2/Gamma(b) = 1/b + 3*M_EG + b*(-5/12*PI^2+7/2*M_EG^2) + O(b^2) + polygamma(1, b)/Gamma(b) = 1/b + M_EG + b*(1/12*PI^2 + 1/2*M_EG^2) + O(b^2) + and so on. + """ + order = 5 + a, b, x, k = symbols("a b x k") + M_PI, M_EG, M_Z3 = symbols("M_PI M_EG M_Z3") + c_subs = {pi: M_PI, EulerGamma: M_EG, zeta(3): M_Z3} + A = [] # terms with a + X = [] # terms with x + B = [] # terms with b (polygammas expanded) + C = [] # terms that generate B + # Phi(a, b, x) = exp(x) * sum(A[i] * X[i] * B[i]) + # B[0] = 1 + # B[k] = sum(C[k] * b**k/k!, k=0..) + # Note: C[k] can be obtained from a series expansion of 1/gamma(b). + expression = gamma(b)/sympy.exp(x) * \ + Sum(x**k/factorial(k)/gamma(a*k+b), (k, 0, S.Infinity)) + + # nth term of taylor series in a=0: a^n/n! * (d^n Phi(a, b, x)/da^n at a=0) + for n in range(0, order+1): + term = expression.diff(a, n).subs(a, 0).simplify().doit() + # set the whole bracket involving polygammas to 1 + x_part = (term.subs(polygamma(0, b), 1) + .replace(polygamma, lambda *args: 0)) + # sign convention: x part always positive + x_part *= (-1)**n + # expansion of polygamma part with 1/gamma(b) + pg_part = term/x_part/gamma(b) + if n >= 1: + # Note: highest term is digamma^n + pg_part = pg_part.replace(polygamma, + lambda k, x: pg_series(k, x, order+1+n)) + pg_part = (pg_part.series(b, 0, n=order+1-n) + .removeO() + .subs(polygamma(2, 1), -2*zeta(3)) + .simplify() + ) + + A.append(a**n/factorial(n)) + X.append(horner(x_part)) + B.append(pg_part) + + # Calculate C and put in the k! + C = sympy.Poly(B[1].subs(c_subs), b).coeffs() + C.reverse() + for i in range(len(C)): + C[i] = (C[i] * factorial(i)).simplify() + + s = "Tylor series expansion of Phi(a, b, x) in a=0 and b=0 up to order 5." + s += "\nPhi(a, b, x) = exp(x) * sum(A[i] * X[i] * B[i], i=0..5)\n" + s += "B[0] = 1\n" + s += "B[i] = sum(C[k+i-1] * b**k/k!, k=0..)\n" + s += "\nM_PI = pi" + s += "\nM_EG = EulerGamma" + s += "\nM_Z3 = zeta(3)" + for name, c in zip(['A', 'X'], [A, X]): + for i in range(len(c)): + s += f"\n{name}[{i}] = " + s += str(c[i]) + # For C, do also compute the values numerically + for i in range(len(C)): + s += f"\n# C[{i}] = " + s += str(C[i]) + s += f"\nC[{i}] = " + s += str(C[i].subs({M_EG: EulerGamma, M_PI: pi, M_Z3: zeta(3)}) + .evalf(17)) + + # Does B have the assumed structure? + s += "\n\nTest if B[i] does have the assumed structure." + s += "\nC[i] are derived from B[1] alone." + s += "\nTest B[2] == C[1] + b*C[2] + b^2/2*C[3] + b^3/6*C[4] + .." + test = sum([b**k/factorial(k) * C[k+1] for k in range(order-1)]) + test = (test - B[2].subs(c_subs)).simplify() + s += f"\ntest successful = {test==S(0)}" + s += "\nTest B[3] == C[2] + b*C[3] + b^2/2*C[4] + .." + test = sum([b**k/factorial(k) * C[k+2] for k in range(order-2)]) + test = (test - B[3].subs(c_subs)).simplify() + s += f"\ntest successful = {test==S(0)}" + return s + + +def asymptotic_series(): + """Asymptotic expansion for large x. + + Phi(a, b, x) ~ Z^(1/2-b) * exp((1+a)/a * Z) * sum_k (-1)^k * C_k / Z^k + Z = (a*x)^(1/(1+a)) + + Wright (1935) lists the coefficients C_0 and C_1 (he calls them a_0 and + a_1). With slightly different notation, Paris (2017) lists coefficients + c_k up to order k=3. + Paris (2017) uses ZP = (1+a)/a * Z (ZP = Z of Paris) and + C_k = C_0 * (-a/(1+a))^k * c_k + """ + order = 8 + + class g(sympy.Function): + """Helper function g according to Wright (1935) + + g(n, rho, v) = (1 + (rho+2)/3 * v + (rho+2)*(rho+3)/(2*3) * v^2 + ...) + + Note: Wright (1935) uses square root of above definition. + """ + nargs = 3 + + @classmethod + def eval(cls, n, rho, v): + if not n >= 0: + raise ValueError("must have n >= 0") + elif n == 0: + return 1 + else: + return g(n-1, rho, v) \ + + gammasimp(gamma(rho+2+n)/gamma(rho+2)) \ + / gammasimp(gamma(3+n)/gamma(3))*v**n + + class coef_C(sympy.Function): + """Calculate coefficients C_m for integer m. + + C_m is the coefficient of v^(2*m) in the Taylor expansion in v=0 of + Gamma(m+1/2)/(2*pi) * (2/(rho+1))^(m+1/2) * (1-v)^(-b) + * g(rho, v)^(-m-1/2) + """ + nargs = 3 + + @classmethod + def eval(cls, m, rho, beta): + if not m >= 0: + raise ValueError("must have m >= 0") + + v = symbols("v") + expression = (1-v)**(-beta) * g(2*m, rho, v)**(-m-Rational(1, 2)) + res = expression.diff(v, 2*m).subs(v, 0) / factorial(2*m) + res = res * (gamma(m + Rational(1, 2)) / (2*pi) + * (2/(rho+1))**(m + Rational(1, 2))) + return res + + # in order to have nice ordering/sorting of expressions, we set a = xa. + xa, b, xap1 = symbols("xa b xap1") + C0 = coef_C(0, xa, b) + # a1 = a(1, rho, beta) + s = "Asymptotic expansion for large x\n" + s += "Phi(a, b, x) = Z**(1/2-b) * exp((1+a)/a * Z) \n" + s += " * sum((-1)**k * C[k]/Z**k, k=0..6)\n\n" + s += "Z = pow(a * x, 1/(1+a))\n" + s += "A[k] = pow(a, k)\n" + s += "B[k] = pow(b, k)\n" + s += "Ap1[k] = pow(1+a, k)\n\n" + s += "C[0] = 1./sqrt(2. * M_PI * Ap1[1])\n" + for i in range(1, order+1): + expr = (coef_C(i, xa, b) / (C0/(1+xa)**i)).simplify() + factor = [x.denominator() for x in sympy.Poly(expr).coeffs()] + factor = sympy.lcm(factor) + expr = (expr * factor).simplify().collect(b, sympy.factor) + expr = expr.xreplace({xa+1: xap1}) + s += f"C[{i}] = C[0] / ({factor} * Ap1[{i}])\n" + s += f"C[{i}] *= {str(expr)}\n\n" + import re + re_a = re.compile(r'xa\*\*(\d+)') + s = re_a.sub(r'A[\1]', s) + re_b = re.compile(r'b\*\*(\d+)') + s = re_b.sub(r'B[\1]', s) + s = s.replace('xap1', 'Ap1[1]') + s = s.replace('xa', 'a') + # max integer = 2^31-1 = 2,147,483,647. Solution: Put a point after 10 + # or more digits. + re_digits = re.compile(r'(\d{10,})') + s = re_digits.sub(r'\1.', s) + return s + + +def optimal_epsilon_integral(): + """Fit optimal choice of epsilon for integral representation. + + The integrand of + int_0^pi P(eps, a, b, x, phi) * dphi + can exhibit oscillatory behaviour. It stems from the cosine of P and can be + minimized by minimizing the arc length of the argument + f(phi) = eps * sin(phi) - x * eps^(-a) * sin(a * phi) + (1 - b) * phi + of cos(f(phi)). + We minimize the arc length in eps for a grid of values (a, b, x) and fit a + parametric function to it. + """ + def fp(eps, a, b, x, phi): + """Derivative of f w.r.t. phi.""" + eps_a = np.power(1. * eps, -a) + return eps * np.cos(phi) - a * x * eps_a * np.cos(a * phi) + 1 - b + + def arclength(eps, a, b, x, epsrel=1e-2, limit=100): + """Compute Arc length of f. + + Note that the arc length of a function f from t0 to t1 is given by + int_t0^t1 sqrt(1 + f'(t)^2) dt + """ + return quad(lambda phi: np.sqrt(1 + fp(eps, a, b, x, phi)**2), + 0, np.pi, + epsrel=epsrel, limit=100)[0] + + # grid of minimal arc length values + data_a = [1e-3, 0.1, 0.5, 0.9, 1, 2, 4, 5, 6, 8] + data_b = [0, 1, 4, 7, 10] + data_x = [1, 1.5, 2, 4, 10, 20, 50, 100, 200, 500, 1e3, 5e3, 1e4] + data_a, data_b, data_x = np.meshgrid(data_a, data_b, data_x) + data_a, data_b, data_x = (data_a.flatten(), data_b.flatten(), + data_x.flatten()) + best_eps = [] + for i in range(data_x.size): + best_eps.append( + minimize_scalar(lambda eps: arclength(eps, data_a[i], data_b[i], + data_x[i]), + bounds=(1e-3, 1000), + method='Bounded', options={'xatol': 1e-3}).x + ) + best_eps = np.array(best_eps) + # pandas would be nice, but here a dictionary is enough + df = {'a': data_a, + 'b': data_b, + 'x': data_x, + 'eps': best_eps, + } + + def func(data, A0, A1, A2, A3, A4, A5): + """Compute parametric function to fit.""" + a = data['a'] + b = data['b'] + x = data['x'] + return (A0 * b * np.exp(-0.5 * a) + + np.exp(A1 + 1 / (1 + a) * np.log(x) - A2 * np.exp(-A3 * a) + + A4 / (1 + np.exp(A5 * a)))) + + func_params = list(curve_fit(func, df, df['eps'], method='trf')[0]) + + s = "Fit optimal eps for integrand P via minimal arc length\n" + s += "with parametric function:\n" + s += "optimal_eps = (A0 * b * exp(-a/2) + exp(A1 + 1 / (1 + a) * log(x)\n" + s += " - A2 * exp(-A3 * a) + A4 / (1 + exp(A5 * a)))\n\n" + s += "Fitted parameters A0 to A5 are:\n" + s += ', '.join([f'{x:.5g}' for x in func_params]) + return s + + +def main(): + t0 = time() + parser = ArgumentParser(description=__doc__, + formatter_class=RawTextHelpFormatter) + parser.add_argument('action', type=int, choices=[1, 2, 3, 4], + help='chose what expansion to precompute\n' + '1 : Series for small a\n' + '2 : Series for small a and small b\n' + '3 : Asymptotic series for large x\n' + ' This may take some time (>4h).\n' + '4 : Fit optimal eps for integral representation.' + ) + args = parser.parse_args() + + switch = {1: lambda: print(series_small_a()), + 2: lambda: print(series_small_a_small_b()), + 3: lambda: print(asymptotic_series()), + 4: lambda: print(optimal_epsilon_integral()) + } + switch.get(args.action, lambda: print("Invalid input."))() + print(f"\n{(time() - t0)/60:.1f} minutes elapsed.\n") + + +if __name__ == '__main__': + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel_data.py b/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel_data.py new file mode 100644 index 0000000000000000000000000000000000000000..a6a36b2cc5d445c8fa5ff400fc91a105a99a29bf --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/wright_bessel_data.py @@ -0,0 +1,152 @@ +"""Compute a grid of values for Wright's generalized Bessel function +and save the values to data files for use in tests. Using mpmath directly in +tests would take too long. + +This takes about 10 minutes to run on a 2.7 GHz i7 Macbook Pro. +""" +from functools import lru_cache +import os +from time import time + +import numpy as np +from scipy.special._mptestutils import mpf2float + +try: + import mpmath as mp +except ImportError: + pass + +# exp_inf: smallest value x for which exp(x) == inf +exp_inf = 709.78271289338403 + + +# 64 Byte per value +@lru_cache(maxsize=100_000) +def rgamma_cached(x, dps): + with mp.workdps(dps): + return mp.rgamma(x) + + +def mp_wright_bessel(a, b, x, dps=50, maxterms=2000): + """Compute Wright's generalized Bessel function as Series with mpmath. + """ + with mp.workdps(dps): + a, b, x = mp.mpf(a), mp.mpf(b), mp.mpf(x) + res = mp.nsum(lambda k: x**k / mp.fac(k) + * rgamma_cached(a * k + b, dps=dps), + [0, mp.inf], + tol=dps, method='s', steps=[maxterms] + ) + return mpf2float(res) + + +def main(): + t0 = time() + print(__doc__) + pwd = os.path.dirname(__file__) + eps = np.finfo(float).eps * 100 + + a_range = np.array([eps, + 1e-4 * (1 - eps), 1e-4, 1e-4 * (1 + eps), + 1e-3 * (1 - eps), 1e-3, 1e-3 * (1 + eps), + 0.1, 0.5, + 1 * (1 - eps), 1, 1 * (1 + eps), + 1.5, 2, 4.999, 5, 10]) + b_range = np.array([0, eps, 1e-10, 1e-5, 0.1, 1, 2, 10, 20, 100]) + x_range = np.array([0, eps, 1 - eps, 1, 1 + eps, + 1.5, + 2 - eps, 2, 2 + eps, + 9 - eps, 9, 9 + eps, + 10 * (1 - eps), 10, 10 * (1 + eps), + 100 * (1 - eps), 100, 100 * (1 + eps), + 500, exp_inf, 1e3, 1e5, 1e10, 1e20]) + + a_range, b_range, x_range = np.meshgrid(a_range, b_range, x_range, + indexing='ij') + a_range = a_range.flatten() + b_range = b_range.flatten() + x_range = x_range.flatten() + + # filter out some values, especially too large x + bool_filter = ~((a_range < 5e-3) & (x_range >= exp_inf)) + bool_filter = bool_filter & ~((a_range < 0.2) & (x_range > exp_inf)) + bool_filter = bool_filter & ~((a_range < 0.5) & (x_range > 1e3)) + bool_filter = bool_filter & ~((a_range < 0.56) & (x_range > 5e3)) + bool_filter = bool_filter & ~((a_range < 1) & (x_range > 1e4)) + bool_filter = bool_filter & ~((a_range < 1.4) & (x_range > 1e5)) + bool_filter = bool_filter & ~((a_range < 1.8) & (x_range > 1e6)) + bool_filter = bool_filter & ~((a_range < 2.2) & (x_range > 1e7)) + bool_filter = bool_filter & ~((a_range < 2.5) & (x_range > 1e8)) + bool_filter = bool_filter & ~((a_range < 2.9) & (x_range > 1e9)) + bool_filter = bool_filter & ~((a_range < 3.3) & (x_range > 1e10)) + bool_filter = bool_filter & ~((a_range < 3.7) & (x_range > 1e11)) + bool_filter = bool_filter & ~((a_range < 4) & (x_range > 1e12)) + bool_filter = bool_filter & ~((a_range < 4.4) & (x_range > 1e13)) + bool_filter = bool_filter & ~((a_range < 4.7) & (x_range > 1e14)) + bool_filter = bool_filter & ~((a_range < 5.1) & (x_range > 1e15)) + bool_filter = bool_filter & ~((a_range < 5.4) & (x_range > 1e16)) + bool_filter = bool_filter & ~((a_range < 5.8) & (x_range > 1e17)) + bool_filter = bool_filter & ~((a_range < 6.2) & (x_range > 1e18)) + bool_filter = bool_filter & ~((a_range < 6.2) & (x_range > 1e18)) + bool_filter = bool_filter & ~((a_range < 6.5) & (x_range > 1e19)) + bool_filter = bool_filter & ~((a_range < 6.9) & (x_range > 1e20)) + + # filter out known values that do not meet the required numerical accuracy + # see test test_wright_data_grid_failures + failing = np.array([ + [0.1, 100, 709.7827128933841], + [0.5, 10, 709.7827128933841], + [0.5, 10, 1000], + [0.5, 100, 1000], + [1, 20, 100000], + [1, 100, 100000], + [1.0000000000000222, 20, 100000], + [1.0000000000000222, 100, 100000], + [1.5, 0, 500], + [1.5, 2.220446049250313e-14, 500], + [1.5, 1.e-10, 500], + [1.5, 1.e-05, 500], + [1.5, 0.1, 500], + [1.5, 20, 100000], + [1.5, 100, 100000], + ]).tolist() + + does_fail = np.full_like(a_range, False, dtype=bool) + for i in range(x_range.size): + if [a_range[i], b_range[i], x_range[i]] in failing: + does_fail[i] = True + + # filter and flatten + a_range = a_range[bool_filter] + b_range = b_range[bool_filter] + x_range = x_range[bool_filter] + does_fail = does_fail[bool_filter] + + dataset = [] + print(f"Computing {x_range.size} single points.") + print("Tests will fail for the following data points:") + for i in range(x_range.size): + a = a_range[i] + b = b_range[i] + x = x_range[i] + # take care of difficult corner cases + maxterms = 1000 + if a < 1e-6 and x >= exp_inf/10: + maxterms = 2000 + f = mp_wright_bessel(a, b, x, maxterms=maxterms) + if does_fail[i]: + print("failing data point a, b, x, value = " + f"[{a}, {b}, {x}, {f}]") + else: + dataset.append((a, b, x, f)) + dataset = np.array(dataset) + + filename = os.path.join(pwd, '..', 'tests', 'data', 'local', + 'wright_bessel.txt') + np.savetxt(filename, dataset) + + print(f"{(time() - t0)/60:.1f} minutes elapsed") + + +if __name__ == "__main__": + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/wrightomega.py b/.venv/Lib/site-packages/scipy/special/_precompute/wrightomega.py new file mode 100644 index 0000000000000000000000000000000000000000..027ff573ba2b480652ce7a560cebf91588ee9d7c --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/wrightomega.py @@ -0,0 +1,41 @@ +import numpy as np + +try: + import mpmath +except ImportError: + pass + + +def mpmath_wrightomega(x): + return mpmath.lambertw(mpmath.exp(x), mpmath.mpf('-0.5')) + + +def wrightomega_series_error(x): + series = x + desired = mpmath_wrightomega(x) + return abs(series - desired) / desired + + +def wrightomega_exp_error(x): + exponential_approx = mpmath.exp(x) + desired = mpmath_wrightomega(x) + return abs(exponential_approx - desired) / desired + + +def main(): + desired_error = 2 * np.finfo(float).eps + print('Series Error') + for x in [1e5, 1e10, 1e15, 1e20]: + with mpmath.workdps(100): + error = wrightomega_series_error(x) + print(x, error, error < desired_error) + + print('Exp error') + for x in [-10, -25, -50, -100, -200, -400, -700, -740]: + with mpmath.workdps(100): + error = wrightomega_exp_error(x) + print(x, error, error < desired_error) + + +if __name__ == '__main__': + main() diff --git a/.venv/Lib/site-packages/scipy/special/_precompute/zetac.py b/.venv/Lib/site-packages/scipy/special/_precompute/zetac.py new file mode 100644 index 0000000000000000000000000000000000000000..76270e3263d41c3255a2daa60474872711571e18 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_precompute/zetac.py @@ -0,0 +1,27 @@ +"""Compute the Taylor series for zeta(x) - 1 around x = 0.""" +try: + import mpmath +except ImportError: + pass + + +def zetac_series(N): + coeffs = [] + with mpmath.workdps(100): + coeffs.append(-1.5) + for n in range(1, N): + coeff = mpmath.diff(mpmath.zeta, 0, n)/mpmath.factorial(n) + coeffs.append(coeff) + return coeffs + + +def main(): + print(__doc__) + coeffs = zetac_series(10) + coeffs = [mpmath.nstr(x, 20, min_fixed=0, max_fixed=0) + for x in coeffs] + print("\n".join(coeffs[::-1])) + + +if __name__ == '__main__': + main() diff --git a/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx.pyx b/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx.pyx new file mode 100644 index 0000000000000000000000000000000000000000..ccc4a717bce1367aa13c4e9f35074030a9466be2 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx.pyx @@ -0,0 +1,181 @@ +# This file is automatically generated by _generate_pyx.py. +# Do not edit manually! + +from libc.math cimport NAN + +include "_ufuncs_extra_code_common.pxi" + +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_ccospi "ccospi"(double complex) noexcept nogil +cdef void *_export_ccospi = _func_ccospi +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_lambertw_scalar "lambertw_scalar"(double complex, long, double) noexcept nogil +cdef void *_export_lambertw_scalar = _func_lambertw_scalar +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_csinpi "csinpi"(double complex) noexcept nogil +cdef void *_export_csinpi = _func_csinpi +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func__stirling2_inexact "_stirling2_inexact"(double, double) noexcept nogil +cdef void *_export__stirling2_inexact = _func__stirling2_inexact +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_ibeta_float "ibeta_float"(float, float, float) noexcept nogil +cdef void *_export_ibeta_float = _func_ibeta_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_ibeta_double "ibeta_double"(double, double, double) noexcept nogil +cdef void *_export_ibeta_double = _func_ibeta_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_ibetac_float "ibetac_float"(float, float, float) noexcept nogil +cdef void *_export_ibetac_float = _func_ibetac_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_ibetac_double "ibetac_double"(double, double, double) noexcept nogil +cdef void *_export_ibetac_double = _func_ibetac_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_ibetac_inv_float "ibetac_inv_float"(float, float, float) noexcept nogil +cdef void *_export_ibetac_inv_float = _func_ibetac_inv_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_ibetac_inv_double "ibetac_inv_double"(double, double, double) noexcept nogil +cdef void *_export_ibetac_inv_double = _func_ibetac_inv_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_ibeta_inv_float "ibeta_inv_float"(float, float, float) noexcept nogil +cdef void *_export_ibeta_inv_float = _func_ibeta_inv_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_ibeta_inv_double "ibeta_inv_double"(double, double, double) noexcept nogil +cdef void *_export_ibeta_inv_double = _func_ibeta_inv_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_binom "binom"(double, double) noexcept nogil +cdef void *_export_binom = _func_binom +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_faddeeva_dawsn "faddeeva_dawsn"(double) noexcept nogil +cdef void *_export_faddeeva_dawsn = _func_faddeeva_dawsn +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_dawsn_complex "faddeeva_dawsn_complex"(double complex) noexcept nogil +cdef void *_export_faddeeva_dawsn_complex = _func_faddeeva_dawsn_complex +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_fellint_RC "fellint_RC"(double, double) noexcept nogil +cdef void *_export_fellint_RC = _func_fellint_RC +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cellint_RC "cellint_RC"(double complex, double complex) noexcept nogil +cdef void *_export_cellint_RC = _func_cellint_RC +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_fellint_RD "fellint_RD"(double, double, double) noexcept nogil +cdef void *_export_fellint_RD = _func_fellint_RD +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cellint_RD "cellint_RD"(double complex, double complex, double complex) noexcept nogil +cdef void *_export_cellint_RD = _func_cellint_RD +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_fellint_RF "fellint_RF"(double, double, double) noexcept nogil +cdef void *_export_fellint_RF = _func_fellint_RF +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cellint_RF "cellint_RF"(double complex, double complex, double complex) noexcept nogil +cdef void *_export_cellint_RF = _func_cellint_RF +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_fellint_RG "fellint_RG"(double, double, double) noexcept nogil +cdef void *_export_fellint_RG = _func_fellint_RG +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cellint_RG "cellint_RG"(double complex, double complex, double complex) noexcept nogil +cdef void *_export_cellint_RG = _func_cellint_RG +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_fellint_RJ "fellint_RJ"(double, double, double, double) noexcept nogil +cdef void *_export_fellint_RJ = _func_fellint_RJ +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cellint_RJ "cellint_RJ"(double complex, double complex, double complex, double complex) noexcept nogil +cdef void *_export_cellint_RJ = _func_cellint_RJ +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_erf "faddeeva_erf"(double complex) noexcept nogil +cdef void *_export_faddeeva_erf = _func_faddeeva_erf +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_erfc_complex "faddeeva_erfc_complex"(double complex) noexcept nogil +cdef void *_export_faddeeva_erfc_complex = _func_faddeeva_erfc_complex +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_faddeeva_erfcx "faddeeva_erfcx"(double) noexcept nogil +cdef void *_export_faddeeva_erfcx = _func_faddeeva_erfcx +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_erfcx_complex "faddeeva_erfcx_complex"(double complex) noexcept nogil +cdef void *_export_faddeeva_erfcx_complex = _func_faddeeva_erfcx_complex +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_faddeeva_erfi "faddeeva_erfi"(double) noexcept nogil +cdef void *_export_faddeeva_erfi = _func_faddeeva_erfi +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_erfi_complex "faddeeva_erfi_complex"(double complex) noexcept nogil +cdef void *_export_faddeeva_erfi_complex = _func_faddeeva_erfi_complex +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_erfinv_float "erfinv_float"(float) noexcept nogil +cdef void *_export_erfinv_float = _func_erfinv_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_erfinv_double "erfinv_double"(double) noexcept nogil +cdef void *_export_erfinv_double = _func_erfinv_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_expit "expit"(double) noexcept nogil +cdef void *_export_expit = _func_expit +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_expitf "expitf"(float) noexcept nogil +cdef void *_export_expitf = _func_expitf +cdef extern from r"_ufuncs_cxx_defs.h": + cdef long double _func_expitl "expitl"(long double) noexcept nogil +cdef void *_export_expitl = _func_expitl +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cgamma "cgamma"(double complex) noexcept nogil +cdef void *_export_cgamma = _func_cgamma +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_hyp1f1_double "hyp1f1_double"(double, double, double) noexcept nogil +cdef void *_export_hyp1f1_double = _func_hyp1f1_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_log_expit "log_expit"(double) noexcept nogil +cdef void *_export_log_expit = _func_log_expit +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_log_expitf "log_expitf"(float) noexcept nogil +cdef void *_export_log_expitf = _func_log_expitf +cdef extern from r"_ufuncs_cxx_defs.h": + cdef long double _func_log_expitl "log_expitl"(long double) noexcept nogil +cdef void *_export_log_expitl = _func_log_expitl +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_faddeeva_log_ndtr "faddeeva_log_ndtr"(double) noexcept nogil +cdef void *_export_faddeeva_log_ndtr = _func_faddeeva_log_ndtr +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_log_ndtr_complex "faddeeva_log_ndtr_complex"(double complex) noexcept nogil +cdef void *_export_faddeeva_log_ndtr_complex = _func_faddeeva_log_ndtr_complex +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_loggamma_real "loggamma_real"(double) noexcept nogil +cdef void *_export_loggamma_real = _func_loggamma_real +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_loggamma "loggamma"(double complex) noexcept nogil +cdef void *_export_loggamma = _func_loggamma +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_logit "logit"(double) noexcept nogil +cdef void *_export_logit = _func_logit +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_logitf "logitf"(float) noexcept nogil +cdef void *_export_logitf = _func_logitf +cdef extern from r"_ufuncs_cxx_defs.h": + cdef long double _func_logitl "logitl"(long double) noexcept nogil +cdef void *_export_logitl = _func_logitl +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_ndtr "faddeeva_ndtr"(double complex) noexcept nogil +cdef void *_export_faddeeva_ndtr = _func_faddeeva_ndtr +cdef extern from r"_ufuncs_cxx_defs.h": + cdef float _func_powm1_float "powm1_float"(float, float) noexcept nogil +cdef void *_export_powm1_float = _func_powm1_float +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_powm1_double "powm1_double"(double, double) noexcept nogil +cdef void *_export_powm1_double = _func_powm1_double +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_cdigamma "cdigamma"(double complex) noexcept nogil +cdef void *_export_cdigamma = _func_cdigamma +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_digamma "digamma"(double) noexcept nogil +cdef void *_export_digamma = _func_digamma +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_crgamma "crgamma"(double complex) noexcept nogil +cdef void *_export_crgamma = _func_crgamma +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_faddeeva_voigt_profile "faddeeva_voigt_profile"(double, double, double) noexcept nogil +cdef void *_export_faddeeva_voigt_profile = _func_faddeeva_voigt_profile +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_faddeeva_w "faddeeva_w"(double complex) noexcept nogil +cdef void *_export_faddeeva_w = _func_faddeeva_w +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double complex _func_wrightomega "wrightomega"(double complex) noexcept nogil +cdef void *_export_wrightomega = _func_wrightomega +cdef extern from r"_ufuncs_cxx_defs.h": + cdef double _func_wrightomega_real "wrightomega_real"(double) noexcept nogil +cdef void *_export_wrightomega_real = _func_wrightomega_real \ No newline at end of file diff --git a/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx_defs.h b/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx_defs.h new file mode 100644 index 0000000000000000000000000000000000000000..37881c2ab8faef520e83769a99d8f71a04dc3447 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_ufuncs_cxx_defs.h @@ -0,0 +1,68 @@ +#ifndef UFUNCS_PROTO_H +#define UFUNCS_PROTO_H 1 +#include "_special.h" +npy_cdouble ccospi(npy_cdouble); +npy_cdouble lambertw_scalar(npy_cdouble, npy_long, npy_double); +npy_cdouble csinpi(npy_cdouble); +#include "stirling2.h" +npy_double _stirling2_inexact(npy_double, npy_double); +#include "boost_special_functions.h" +npy_float ibeta_float(npy_float, npy_float, npy_float); +npy_double ibeta_double(npy_double, npy_double, npy_double); +npy_float ibetac_float(npy_float, npy_float, npy_float); +npy_double ibetac_double(npy_double, npy_double, npy_double); +npy_float ibetac_inv_float(npy_float, npy_float, npy_float); +npy_double ibetac_inv_double(npy_double, npy_double, npy_double); +npy_float ibeta_inv_float(npy_float, npy_float, npy_float); +npy_double ibeta_inv_double(npy_double, npy_double, npy_double); +npy_double binom(npy_double, npy_double); +#include "_faddeeva.h" +npy_double faddeeva_dawsn(npy_double); +npy_cdouble faddeeva_dawsn_complex(npy_cdouble); +#include "ellint_carlson_wrap.hh" +npy_double fellint_RC(npy_double, npy_double); +npy_cdouble cellint_RC(npy_cdouble, npy_cdouble); +npy_double fellint_RD(npy_double, npy_double, npy_double); +npy_cdouble cellint_RD(npy_cdouble, npy_cdouble, npy_cdouble); +npy_double fellint_RF(npy_double, npy_double, npy_double); +npy_cdouble cellint_RF(npy_cdouble, npy_cdouble, npy_cdouble); +npy_double fellint_RG(npy_double, npy_double, npy_double); +npy_cdouble cellint_RG(npy_cdouble, npy_cdouble, npy_cdouble); +npy_double fellint_RJ(npy_double, npy_double, npy_double, npy_double); +npy_cdouble cellint_RJ(npy_cdouble, npy_cdouble, npy_cdouble, npy_cdouble); +npy_cdouble faddeeva_erf(npy_cdouble); +npy_cdouble faddeeva_erfc_complex(npy_cdouble); +npy_double faddeeva_erfcx(npy_double); +npy_cdouble faddeeva_erfcx_complex(npy_cdouble); +npy_double faddeeva_erfi(npy_double); +npy_cdouble faddeeva_erfi_complex(npy_cdouble); +npy_float erfinv_float(npy_float); +npy_double erfinv_double(npy_double); +#include "_logit.h" +npy_double expit(npy_double); +npy_float expitf(npy_float); +npy_longdouble expitl(npy_longdouble); +npy_cdouble cgamma(npy_cdouble); +npy_double hyp1f1_double(npy_double, npy_double, npy_double); +npy_double log_expit(npy_double); +npy_float log_expitf(npy_float); +npy_longdouble log_expitl(npy_longdouble); +npy_double faddeeva_log_ndtr(npy_double); +npy_cdouble faddeeva_log_ndtr_complex(npy_cdouble); +npy_double loggamma_real(npy_double); +npy_cdouble loggamma(npy_cdouble); +npy_double logit(npy_double); +npy_float logitf(npy_float); +npy_longdouble logitl(npy_longdouble); +npy_cdouble faddeeva_ndtr(npy_cdouble); +npy_float powm1_float(npy_float, npy_float); +npy_double powm1_double(npy_double, npy_double); +npy_cdouble cdigamma(npy_cdouble); +npy_double digamma(npy_double); +npy_cdouble crgamma(npy_cdouble); +npy_double faddeeva_voigt_profile(npy_double, npy_double, npy_double); +npy_cdouble faddeeva_w(npy_cdouble); +#include "_wright.h" +npy_cdouble wrightomega(npy_cdouble); +npy_double wrightomega_real(npy_double); +#endif diff --git a/.venv/Lib/site-packages/scipy/special/_ufuncs_defs.h b/.venv/Lib/site-packages/scipy/special/_ufuncs_defs.h new file mode 100644 index 0000000000000000000000000000000000000000..548a43d435b34c2b22ebcf6380c2368f269ad9f7 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/_ufuncs_defs.h @@ -0,0 +1,185 @@ +#ifndef UFUNCS_PROTO_H +#define UFUNCS_PROTO_H 1 +#include "_cosine.h" +npy_double cosine_cdf(npy_double); +npy_double cosine_invcdf(npy_double); +#include "cephes.h" +npy_double cospi(npy_double); +npy_double igam_fac(npy_double, npy_double); +npy_double kolmogc(npy_double); +npy_double kolmogci(npy_double); +npy_double kolmogp(npy_double); +npy_double lanczos_sum_expg_scaled(npy_double); +npy_double lgam1p(npy_double); +npy_double log1pmx(npy_double); +npy_double riemann_zeta(npy_double); +#include "scaled_exp1.h" +npy_double scaled_exp1(npy_double); +npy_double sinpi(npy_double); +npy_double smirnovc(npy_int, npy_double); +npy_double smirnovci(npy_int, npy_double); +npy_double smirnovp(npy_int, npy_double); +npy_double struve_asymp_large_z(npy_double, npy_double, npy_int, npy_double *); +npy_double struve_bessel_series(npy_double, npy_double, npy_int, npy_double *); +npy_double struve_power_series(npy_double, npy_double, npy_int, npy_double *); +npy_double zeta(npy_double, npy_double); +#include "amos_wrappers.h" +npy_int airy_wrap(npy_double, npy_double *, npy_double *, npy_double *, npy_double *); +npy_int cairy_wrap(npy_cdouble, npy_cdouble *, npy_cdouble *, npy_cdouble *, npy_cdouble *); +npy_int cairy_wrap_e(npy_cdouble, npy_cdouble *, npy_cdouble *, npy_cdouble *, npy_cdouble *); +npy_int cairy_wrap_e_real(npy_double, npy_double *, npy_double *, npy_double *, npy_double *); +npy_double bdtr(npy_double, npy_int, npy_double); +npy_double bdtrc(npy_double, npy_int, npy_double); +npy_double bdtri(npy_double, npy_int, npy_double); +#include "specfun_wrappers.h" +npy_double bei_wrap(npy_double); +npy_double beip_wrap(npy_double); +npy_double ber_wrap(npy_double); +npy_double berp_wrap(npy_double); +npy_double besselpoly(npy_double, npy_double, npy_double); +npy_double beta(npy_double, npy_double); +npy_double lbeta(npy_double, npy_double); +npy_double btdtr(npy_double, npy_double, npy_double); +npy_double incbi(npy_double, npy_double, npy_double); +npy_double cbrt(npy_double); +npy_double chdtr(npy_double, npy_double); +npy_double chdtrc(npy_double, npy_double); +npy_double chdtri(npy_double, npy_double); +npy_double cosdg(npy_double); +npy_double cosm1(npy_double); +npy_double cotdg(npy_double); +npy_double ellpe(npy_double); +npy_double ellie(npy_double, npy_double); +npy_int ellpj(npy_double, npy_double, npy_double *, npy_double *, npy_double *, npy_double *); +npy_double ellik(npy_double, npy_double); +npy_double ellpk(npy_double); +npy_double erf(npy_double); +npy_double erfc(npy_double); +npy_double erfcinv(npy_double); +npy_cdouble cexp1_wrap(npy_cdouble); +npy_double exp1_wrap(npy_double); +npy_double exp10(npy_double); +npy_double exp2(npy_double); +npy_cdouble cexpi_wrap(npy_cdouble); +npy_double expi_wrap(npy_double); +npy_double expm1(npy_double); +npy_double expn(npy_int, npy_double); +npy_double fdtr(npy_double, npy_double, npy_double); +npy_double fdtrc(npy_double, npy_double, npy_double); +npy_double fdtri(npy_double, npy_double, npy_double); +npy_int fresnl(npy_double, npy_double *, npy_double *); +npy_int cfresnl_wrap(npy_cdouble, npy_cdouble *, npy_cdouble *); +npy_double Gamma(npy_double); +npy_double igam(npy_double, npy_double); +npy_double igamc(npy_double, npy_double); +npy_double igamci(npy_double, npy_double); +npy_double igami(npy_double, npy_double); +npy_double lgam(npy_double); +npy_double gammasgn(npy_double); +npy_double gdtr(npy_double, npy_double, npy_double); +npy_double gdtrc(npy_double, npy_double, npy_double); +npy_cdouble cbesh_wrap1(npy_double, npy_cdouble); +npy_cdouble cbesh_wrap1_e(npy_double, npy_cdouble); +npy_cdouble cbesh_wrap2(npy_double, npy_cdouble); +npy_cdouble cbesh_wrap2_e(npy_double, npy_cdouble); +npy_cdouble chyp1f1_wrap(npy_double, npy_double, npy_cdouble); +npy_double hyp2f1(npy_double, npy_double, npy_double, npy_double); +npy_double i0(npy_double); +npy_double i0e(npy_double); +npy_double i1(npy_double); +npy_double i1e(npy_double); +npy_int it2i0k0_wrap(npy_double, npy_double *, npy_double *); +npy_int it2j0y0_wrap(npy_double, npy_double *, npy_double *); +npy_double it2struve0_wrap(npy_double); +npy_int itairy_wrap(npy_double, npy_double *, npy_double *, npy_double *, npy_double *); +npy_int it1i0k0_wrap(npy_double, npy_double *, npy_double *); +npy_int it1j0y0_wrap(npy_double, npy_double *, npy_double *); +npy_double itmodstruve0_wrap(npy_double); +npy_double itstruve0_wrap(npy_double); +npy_cdouble cbesi_wrap(npy_double, npy_cdouble); +npy_double iv(npy_double, npy_double); +npy_cdouble cbesi_wrap_e(npy_double, npy_cdouble); +npy_double cbesi_wrap_e_real(npy_double, npy_double); +npy_double j0(npy_double); +npy_double j1(npy_double); +npy_cdouble cbesj_wrap(npy_double, npy_cdouble); +npy_double cbesj_wrap_real(npy_double, npy_double); +npy_cdouble cbesj_wrap_e(npy_double, npy_cdouble); +npy_double cbesj_wrap_e_real(npy_double, npy_double); +npy_double k0(npy_double); +npy_double k0e(npy_double); +npy_double k1(npy_double); +npy_double k1e(npy_double); +npy_double kei_wrap(npy_double); +npy_double keip_wrap(npy_double); +npy_int kelvin_wrap(npy_double, npy_cdouble *, npy_cdouble *, npy_cdouble *, npy_cdouble *); +npy_double ker_wrap(npy_double); +npy_double kerp_wrap(npy_double); +npy_double cbesk_wrap_real_int(npy_int, npy_double); +npy_double kolmogi(npy_double); +npy_double kolmogorov(npy_double); +npy_cdouble cbesk_wrap(npy_double, npy_cdouble); +npy_double cbesk_wrap_real(npy_double, npy_double); +npy_cdouble cbesk_wrap_e(npy_double, npy_cdouble); +npy_double cbesk_wrap_e_real(npy_double, npy_double); +npy_double log1p(npy_double); +npy_double pmv_wrap(npy_double, npy_double, npy_double); +npy_double cem_cva_wrap(npy_double, npy_double); +npy_double sem_cva_wrap(npy_double, npy_double); +npy_int cem_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int mcm1_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int mcm2_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int msm1_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int msm2_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int sem_wrap(npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_int modified_fresnel_minus_wrap(npy_double, npy_cdouble *, npy_cdouble *); +npy_int modified_fresnel_plus_wrap(npy_double, npy_cdouble *, npy_cdouble *); +npy_double struve_l(npy_double, npy_double); +npy_double nbdtr(npy_int, npy_int, npy_double); +npy_double nbdtrc(npy_int, npy_int, npy_double); +npy_double nbdtri(npy_int, npy_int, npy_double); +npy_double ndtr(npy_double); +npy_double ndtri(npy_double); +npy_double oblate_aswfa_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int oblate_aswfa_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double oblate_segv_wrap(npy_double, npy_double, npy_double); +npy_double oblate_radial1_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int oblate_radial1_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double oblate_radial2_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int oblate_radial2_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double owens_t(npy_double, npy_double); +npy_int pbdv_wrap(npy_double, npy_double, npy_double *, npy_double *); +npy_int pbvv_wrap(npy_double, npy_double, npy_double *, npy_double *); +npy_int pbwa_wrap(npy_double, npy_double, npy_double *, npy_double *); +npy_double pdtr(npy_double, npy_double); +npy_double pdtrc(npy_double, npy_double); +npy_double pdtri(npy_int, npy_double); +npy_double poch(npy_double, npy_double); +npy_double prolate_aswfa_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int prolate_aswfa_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double prolate_segv_wrap(npy_double, npy_double, npy_double); +npy_double prolate_radial1_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int prolate_radial1_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double prolate_radial2_nocv_wrap(npy_double, npy_double, npy_double, npy_double, npy_double *); +npy_int prolate_radial2_wrap(npy_double, npy_double, npy_double, npy_double, npy_double, npy_double *, npy_double *); +npy_double radian(npy_double, npy_double, npy_double); +npy_double rgamma(npy_double); +npy_double round(npy_double); +npy_int shichi(npy_double, npy_double *, npy_double *); +npy_int sici(npy_double, npy_double *, npy_double *); +npy_double sindg(npy_double); +npy_double smirnov(npy_int, npy_double); +npy_double smirnovi(npy_int, npy_double); +npy_double spence(npy_double); +npy_double struve_h(npy_double, npy_double); +npy_double tandg(npy_double); +npy_double tukeylambdacdf(npy_double, npy_double); +npy_double y0(npy_double); +npy_double y1(npy_double); +npy_double yn(npy_int, npy_double); +npy_cdouble cbesy_wrap(npy_double, npy_cdouble); +npy_double cbesy_wrap_real(npy_double, npy_double); +npy_cdouble cbesy_wrap_e(npy_double, npy_cdouble); +npy_double cbesy_wrap_e_real(npy_double, npy_double); +npy_double zetac(npy_double); +#endif diff --git a/.venv/Lib/site-packages/scipy/special/special/binom.h b/.venv/Lib/site-packages/scipy/special/special/binom.h new file mode 100644 index 0000000000000000000000000000000000000000..9e4bdfed54a1dfb5c69dd722cf94a7c713143984 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/special/binom.h @@ -0,0 +1,85 @@ +/* Translated from Cython into C++ by SciPy developers in 2024. + * + * Original authors: Pauli Virtanen, Eric Moore + */ + +// Binomial coefficient + +#pragma once + +#include "config.h" + +#include "cephes/beta.h" +#include "cephes/gamma.h" + +namespace special { + +SPECFUN_HOST_DEVICE inline double binom(double n, double k) { + double kx, nx, num, den, dk, sgn; + + if (n < 0) { + nx = std::floor(n); + if (n == nx) { + // Undefined + return std::numeric_limits::quiet_NaN(); + } + } + + kx = std::floor(k); + if (k == kx && (std::abs(n) > 1E-8 || n == 0)) { + /* Integer case: use multiplication formula for less rounding + * error for cases where the result is an integer. + * + * This cannot be used for small nonzero n due to loss of + * precision. */ + nx = std::floor(n); + if (nx == n && kx > nx / 2 && nx > 0) { + // Reduce kx by symmetry + kx = nx - kx; + } + + if (kx >= 0 && kx < 20) { + num = 1.0; + den = 1.0; + for (int i = 1; i < 1 + static_cast(kx); i++) { + num *= i + n - kx; + den *= i; + if (std::abs(num) > 1E50) { + num /= den; + den = 1.0; + } + } + return num / den; + } + } + + // general case + if (n >= 1E10 * k and k > 0) { + // avoid under/overflows intermediate results + return std::exp(-cephes::lbeta(1 + n - k, 1 + k) - std::log(n + 1)); + } + if (k > 1E8 * std::abs(n)) { + // avoid loss of precision + num = cephes::Gamma(1 + n) / std::abs(k) + cephes::Gamma(1 + n) * n / (2 * k * k); // + ... + num /= M_PI * std::pow(std::abs(k), n); + if (k > 0) { + kx = std::floor(k); + if (static_cast(kx) == kx) { + dk = k - kx; + sgn = (static_cast(kx) % 2 == 0) ? 1 : -1; + } else { + dk = k; + sgn = 1; + } + return num * std::sin((dk - n) * M_PI) * sgn; + } + kx = std::floor(k); + if (static_cast(kx) == kx) { + return 0; + } + return num * std::sin(k * M_PI); + } + return 1 / (n + 1) / cephes::beta(1 + n - k, 1 + k); +} + +} // namespace special diff --git a/.venv/Lib/site-packages/scipy/special/special/config.h b/.venv/Lib/site-packages/scipy/special/special/config.h new file mode 100644 index 0000000000000000000000000000000000000000..7ea3769783b75dcccfc59952c5e7d042d63adb3d --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/special/config.h @@ -0,0 +1,158 @@ +#pragma once + +// Define math constants if they are not available +#ifndef M_E +#define M_E 2.71828182845904523536 +#endif + +#ifndef M_LOG2E +#define M_LOG2E 1.44269504088896340736 +#endif + +#ifndef M_LOG10E +#define M_LOG10E 0.434294481903251827651 +#endif + +#ifndef M_LN2 +#define M_LN2 0.693147180559945309417 +#endif + +#ifndef M_LN10 +#define M_LN10 2.30258509299404568402 +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#ifndef M_PI_2 +#define M_PI_2 1.57079632679489661923 +#endif + +#ifndef M_PI_4 +#define M_PI_4 0.785398163397448309616 +#endif + +#ifndef M_1_PI +#define M_1_PI 0.318309886183790671538 +#endif + +#ifndef M_2_PI +#define M_2_PI 0.636619772367581343076 +#endif + +#ifndef M_2_SQRTPI +#define M_2_SQRTPI 1.12837916709551257390 +#endif + +#ifndef M_SQRT2 +#define M_SQRT2 1.41421356237309504880 +#endif + +#ifndef M_SQRT1_2 +#define M_SQRT1_2 0.707106781186547524401 +#endif + +#ifdef __CUDACC__ +#define SPECFUN_HOST_DEVICE __host__ __device__ + +#include +#include + +// Fallback to global namespace for functions unsupported on NVRTC Jit +#ifdef _LIBCUDACXX_COMPILER_NVRTC +#include +#endif + +namespace std { + +SPECFUN_HOST_DEVICE inline double abs(double num) { return cuda::std::abs(num); } + +SPECFUN_HOST_DEVICE inline double exp(double num) { return cuda::std::exp(num); } + +SPECFUN_HOST_DEVICE inline double log(double num) { return cuda::std::log(num); } + +SPECFUN_HOST_DEVICE inline double sqrt(double num) { return cuda::std::sqrt(num); } + +SPECFUN_HOST_DEVICE inline bool isnan(double num) { return cuda::std::isnan(num); } + +SPECFUN_HOST_DEVICE inline bool isfinite(double num) { return cuda::std::isfinite(num); } + +SPECFUN_HOST_DEVICE inline double pow(double x, double y) { return cuda::std::pow(x, y); } + +SPECFUN_HOST_DEVICE inline double sin(double x) { return cuda::std::sin(x); } + +SPECFUN_HOST_DEVICE inline double tan(double x) { return cuda::std::tan(x); } + +SPECFUN_HOST_DEVICE inline double sinh(double x) { return cuda::std::sinh(x); } + +SPECFUN_HOST_DEVICE inline double cosh(double x) { return cuda::std::cosh(x); } + +SPECFUN_HOST_DEVICE inline bool signbit(double x) { return cuda::std::signbit(x); } + +// Fallback to global namespace for functions unsupported on NVRTC +#ifndef _LIBCUDACXX_COMPILER_NVRTC +SPECFUN_HOST_DEVICE inline double ceil(double x) { return cuda::std::ceil(x); } +SPECFUN_HOST_DEVICE inline double floor(double x) { return cuda::std::floor(x); } +SPECFUN_HOST_DEVICE inline double trunc(double x) { return cuda::std::trunc(x); } +SPECFUN_HOST_DEVICE inline double fma(double x, double y, double z) { return cuda::std::fma(x, y, z); } +SPECFUN_HOST_DEVICE inline double copysign(double x, double y) { return cuda::std::copysign(x, y); } +SPECFUN_HOST_DEVICE inline double modf(double value, double *iptr) { return cuda::std::modf(value, iptr); } + +#else +SPECFUN_HOST_DEVICE inline double ceil(double x) { return ::ceil(x); } +SPECFUN_HOST_DEVICE inline double floor(double x) { return ::floor(x); } +SPECFUN_HOST_DEVICE inline double trunc(double x) { return ::trunc(x); } +SPECFUN_HOST_DEVICE inline double fma(double x, double y, double z) { return ::fma(x, y, z); } +SPECFUN_HOST_DEVICE inline double copysign(double x, double y) { return ::copysign(x, y); } +SPECFUN_HOST_DEVICE inline double modf(double value, double *iptr) { return ::modf(value, iptr); } +#endif + +template +using numeric_limits = cuda::std::numeric_limits; + +// Must use thrust for complex types in order to support CuPy +template +using complex = thrust::complex; + +template +SPECFUN_HOST_DEVICE T abs(const complex &z) { + return thrust::abs(z); +} + +template +SPECFUN_HOST_DEVICE complex exp(const complex &z) { + return thrust::exp(z); +} + +template +SPECFUN_HOST_DEVICE complex log(const complex &z) { + return thrust::log(z); +} + +template +SPECFUN_HOST_DEVICE T norm(const complex &z) { + return thrust::norm(z); +} + +template +SPECFUN_HOST_DEVICE complex sqrt(const complex &z) { + return thrust::sqrt(z); +} + +template +SPECFUN_HOST_DEVICE complex conj(const complex &z) { + return thrust::conj(z); +} + +} // namespace std + +#else +#define SPECFUN_HOST_DEVICE + +#include +#include +#include +#include + +#endif diff --git a/.venv/Lib/site-packages/scipy/special/special/digamma.h b/.venv/Lib/site-packages/scipy/special/special/digamma.h new file mode 100644 index 0000000000000000000000000000000000000000..7351a584b3a536aece37a493caf2df051cd21bef --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/special/digamma.h @@ -0,0 +1,198 @@ +/* Translated from Cython into C++ by SciPy developers in 2024. + * Original header comment appears below. + */ + +/* An implementation of the digamma function for complex arguments. + * + * Author: Josh Wilson + * + * Distributed under the same license as Scipy. + * + * Sources: + * [1] "The Digital Library of Mathematical Functions", dlmf.nist.gov + * + * [2] mpmath (version 0.19), http://mpmath.org + */ + +#pragma once + +#include "cephes/psi.h" +#include "cephes/zeta.h" +#include "config.h" +#include "error.h" +#include "trig.h" + +namespace special { +namespace detail { + // All of the following were computed with mpmath + // Location of the positive root + constexpr double digamma_posroot = 1.4616321449683623; + // Value of the positive root + constexpr double digamma_posrootval = -9.2412655217294275e-17; + // Location of the negative root + constexpr double digamma_negroot = -0.504083008264455409; + // Value of the negative root + constexpr double digamma_negrootval = 7.2897639029768949e-17; + + template + SPECFUN_HOST_DEVICE T digamma_zeta_series(T z, double root, double rootval) { + T res = rootval; + T coeff = -1.0; + + z = z - root; + T term; + for (int n = 1; n < 100; n++) { + coeff *= -z; + term = coeff * cephes::zeta(n + 1, root); + res += term; + if (std::abs(term) < std::numeric_limits::epsilon() * std::abs(res)) { + break; + } + } + return res; + } + + SPECFUN_HOST_DEVICE inline std::complex digamma_forward_recurrence(std::complex z, + std::complex psiz, int n) { + /* Compute digamma(z + n) using digamma(z) using the recurrence + * relation + * + * digamma(z + 1) = digamma(z) + 1/z. + * + * See https://dlmf.nist.gov/5.5#E2 */ + std::complex res = psiz; + + for (int k = 0; k < n; k++) { + res += 1.0 / (z + static_cast(k)); + } + return res; + } + + SPECFUN_HOST_DEVICE inline std::complex digamma_backward_recurrence(std::complex z, + std::complex psiz, int n) { + /* Compute digamma(z - n) using digamma(z) and a recurrence relation. */ + std::complex res = psiz; + + for (int k = 1; k < n + 1; k++) { + res -= 1.0 / (z - static_cast(k)); + } + return res; + } + + SPECFUN_HOST_DEVICE inline std::complex digamma_asymptotic_series(std::complex z) { + /* Evaluate digamma using an asymptotic series. See + * + * https://dlmf.nist.gov/5.11#E2 */ + double bernoulli2k[] = { + 0.166666666666666667, -0.0333333333333333333, 0.0238095238095238095, -0.0333333333333333333, + 0.0757575757575757576, -0.253113553113553114, 1.16666666666666667, -7.09215686274509804, + 54.9711779448621554, -529.124242424242424, 6192.12318840579710, -86580.2531135531136, + 1425517.16666666667, -27298231.0678160920, 601580873.900642368, -15116315767.0921569}; + std::complex rzz = 1.0 / z / z; + std::complex zfac = 1.0; + std::complex term; + std::complex res; + + if (!(std::isfinite(z.real()) && std::isfinite(z.imag()))) { + /* Check for infinity (or nan) and return early. + * Result of division by complex infinity is implementation dependent. + * and has been observed to vary between C++ stdlib and CUDA stdlib. + */ + return std::log(z); + } + + res = std::log(z) - 0.5 / z; + + for (int k = 1; k < 17; k++) { + zfac *= rzz; + term = -bernoulli2k[k - 1] * zfac / (2 * static_cast(k)); + res += term; + if (std::abs(term) < std::numeric_limits::epsilon() * std::abs(res)) { + break; + } + } + return res; + } + +} // namespace detail + +SPECFUN_HOST_DEVICE inline double digamma(double z) { + /* Wrap Cephes' psi to take advantage of the series expansion around + * the smallest negative zero. + */ + if (std::abs(z - detail::digamma_negroot) < 0.3) { + return detail::digamma_zeta_series(z, detail::digamma_negroot, detail::digamma_negrootval); + } + return cephes::psi(z); +} + +SPECFUN_HOST_DEVICE inline std::complex digamma(std::complex z) { + /* + * Compute the digamma function for complex arguments. The strategy + * is: + * + * - Around the two zeros closest to the origin (posroot and negroot) + * use a Taylor series with precomputed zero order coefficient. + * - If close to the origin, use a recurrence relation to step away + * from the origin. + * - If close to the negative real axis, use the reflection formula + * to move to the right halfplane. + * - If |z| is large (> 16), use the asymptotic series. + * - If |z| is small, use a recurrence relation to make |z| large + * enough to use the asymptotic series. + */ + double absz = std::abs(z); + std::complex res = 0; + /* Use the asymptotic series for z away from the negative real axis + * with abs(z) > smallabsz. */ + int smallabsz = 16; + /* Use the reflection principle for z with z.real < 0 that are within + * smallimag of the negative real axis. + * int smallimag = 6 # unused below except in a comment */ + + if (z.real() <= 0.0 && std::ceil(z.real()) == z) { + // Poles + set_error("digamma", SF_ERROR_SINGULAR, NULL); + return {std::numeric_limits::quiet_NaN(), std::numeric_limits::quiet_NaN()}; + } + if (std::abs(z - detail::digamma_negroot) < 0.3) { + // First negative root. + return detail::digamma_zeta_series(z, detail::digamma_negroot, detail::digamma_negrootval); + } + + if (z.real() < 0 and std::abs(z.imag()) < smallabsz) { + /* Reflection formula for digamma. See + * + *https://dlmf.nist.gov/5.5#E4 + */ + res = -M_PI * cospi(z) / sinpi(z); + z = 1.0 - z; + absz = std::abs(z); + } + + if (absz < 0.5) { + /* Use one step of the recurrence relation to step away from + * the pole. */ + res = -1.0 / z; + z += 1.0; + absz = std::abs(z); + } + + if (std::abs(z - detail::digamma_posroot) < 0.5) { + res += detail::digamma_zeta_series(z, detail::digamma_posroot, detail::digamma_posrootval); + } else if (absz > smallabsz) { + res += detail::digamma_asymptotic_series(z); + } else if (z.real() >= 0.0) { + double n = std::trunc(smallabsz - absz) + 1; + std::complex init = detail::digamma_asymptotic_series(z + n); + res += detail::digamma_backward_recurrence(z + n, init, n); + } else { + // z.real() < 0, absz < smallabsz, and z.imag() > smallimag + double n = std::trunc(smallabsz - absz) - 1; + std::complex init = detail::digamma_asymptotic_series(z - n); + res += detail::digamma_forward_recurrence(z - n, init, n); + } + return res; +} + +} // namespace special diff --git a/.venv/Lib/site-packages/scipy/special/special/error.h b/.venv/Lib/site-packages/scipy/special/special/error.h new file mode 100644 index 0000000000000000000000000000000000000000..cb028fd1bddc837d5c04403ca9f2e69f3365db75 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/special/error.h @@ -0,0 +1,42 @@ +#pragma once + +// should be included from config.h, but that won't work until we've cleanly separated out the C and C++ parts of the +// code +#ifdef __CUDACC__ +#define SPECFUN_HOST_DEVICE __host__ __device__ +#else +#define SPECFUN_HOST_DEVICE +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + SF_ERROR_OK = 0, /* no error */ + SF_ERROR_SINGULAR, /* singularity encountered */ + SF_ERROR_UNDERFLOW, /* floating point underflow */ + SF_ERROR_OVERFLOW, /* floating point overflow */ + SF_ERROR_SLOW, /* too many iterations required */ + SF_ERROR_LOSS, /* loss of precision */ + SF_ERROR_NO_RESULT, /* no result obtained */ + SF_ERROR_DOMAIN, /* out of domain */ + SF_ERROR_ARG, /* invalid input parameter */ + SF_ERROR_OTHER, /* unclassified error */ + SF_ERROR__LAST +} sf_error_t; + +#ifdef __cplusplus +namespace special { + +#ifndef SP_SPECFUN_ERROR + SPECFUN_HOST_DEVICE inline void set_error(const char *func_name, sf_error_t code, const char *fmt, ...) { + // nothing + } +#else + void set_error(const char *func_name, sf_error_t code, const char *fmt, ...); +#endif +} // namespace special + +} // closes extern "C" +#endif diff --git a/.venv/Lib/site-packages/scipy/special/tests/__init__.py b/.venv/Lib/site-packages/scipy/special/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/Lib/site-packages/scipy/special/tests/data/__init__.py b/.venv/Lib/site-packages/scipy/special/tests/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/.venv/Lib/site-packages/scipy/special/tests/test_wright_bessel.py b/.venv/Lib/site-packages/scipy/special/tests/test_wright_bessel.py new file mode 100644 index 0000000000000000000000000000000000000000..125067543ee8e6484aae8d4cd2711da8ac64e840 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/tests/test_wright_bessel.py @@ -0,0 +1,115 @@ +# Reference MPMATH implementation: +# +# import mpmath +# from mpmath import nsum +# +# def Wright_Series_MPMATH(a, b, z, dps=50, method='r+s+e', steps=[1000]): +# """Compute Wright' generalized Bessel function as Series. +# +# This uses mpmath for arbitrary precision. +# """ +# with mpmath.workdps(dps): +# res = nsum(lambda k: z**k/mpmath.fac(k) * mpmath.rgamma(a*k+b), +# [0, mpmath.inf], +# tol=dps, method=method, steps=steps +# ) +# +# return res + +import pytest +import numpy as np +from numpy.testing import assert_equal, assert_allclose + +import scipy.special as sc +from scipy.special import rgamma, wright_bessel + + +@pytest.mark.parametrize('a', [0, 1e-6, 0.1, 0.5, 1, 10]) +@pytest.mark.parametrize('b', [0, 1e-6, 0.1, 0.5, 1, 10]) +def test_wright_bessel_zero(a, b): + """Test at x = 0.""" + assert_equal(wright_bessel(a, b, 0.), rgamma(b)) + + +@pytest.mark.parametrize('b', [0, 1e-6, 0.1, 0.5, 1, 10]) +@pytest.mark.parametrize('x', [0, 1e-6, 0.1, 0.5, 1]) +def test_wright_bessel_iv(b, x): + """Test relation of wright_bessel and modified bessel function iv. + + iv(z) = (1/2*z)**v * Phi(1, v+1; 1/4*z**2). + See https://dlmf.nist.gov/10.46.E2 + """ + if x != 0: + v = b - 1 + wb = wright_bessel(1, v + 1, x**2 / 4.) + # Note: iv(v, x) has precision of less than 1e-12 for some cases + # e.g v=1-1e-6 and x=1e-06) + assert_allclose(np.power(x / 2., v) * wb, + sc.iv(v, x), + rtol=1e-11, atol=1e-11) + + +@pytest.mark.parametrize('a', [0, 1e-6, 0.1, 0.5, 1, 10]) +@pytest.mark.parametrize('b', [1, 1 + 1e-3, 2, 5, 10]) +@pytest.mark.parametrize('x', [0, 1e-6, 0.1, 0.5, 1, 5, 10, 100]) +def test_wright_functional(a, b, x): + """Test functional relation of wright_bessel. + + Phi(a, b-1, z) = a*z*Phi(a, b+a, z) + (b-1)*Phi(a, b, z) + + Note that d/dx Phi(a, b, x) = Phi(a, b-1, x) + See Eq. (22) of + B. Stankovic, On the Function of E. M. Wright, + Publ. de l' Institut Mathematique, Beograd, + Nouvelle S`er. 10 (1970), 113-124. + """ + assert_allclose(wright_bessel(a, b - 1, x), + a * x * wright_bessel(a, b + a, x) + + (b - 1) * wright_bessel(a, b, x), + rtol=1e-8, atol=1e-8) + + +# grid of rows [a, b, x, value, accuracy] that do not reach 1e-11 accuracy +# see output of: +# cd scipy/scipy/_precompute +# python wright_bessel_data.py +grid_a_b_x_value_acc = np.array([ + [0.1, 100.0, 709.7827128933841, 8.026353022981087e+34, 2e-8], + [0.5, 10.0, 709.7827128933841, 2.680788404494657e+48, 9e-8], + [0.5, 10.0, 1000.0, 2.005901980702872e+64, 1e-8], + [0.5, 100.0, 1000.0, 3.4112367580445246e-117, 6e-8], + [1.0, 20.0, 100000.0, 1.7717158630699857e+225, 3e-11], + [1.0, 100.0, 100000.0, 1.0269334596230763e+22, np.nan], + [1.0000000000000222, 20.0, 100000.0, 1.7717158630001672e+225, 3e-11], + [1.0000000000000222, 100.0, 100000.0, 1.0269334595866202e+22, np.nan], + [1.5, 0.0, 500.0, 15648961196.432373, 3e-11], + [1.5, 2.220446049250313e-14, 500.0, 15648961196.431465, 3e-11], + [1.5, 1e-10, 500.0, 15648961192.344728, 3e-11], + [1.5, 1e-05, 500.0, 15648552437.334162, 3e-11], + [1.5, 0.1, 500.0, 12049870581.10317, 2e-11], + [1.5, 20.0, 100000.0, 7.81930438331405e+43, 3e-9], + [1.5, 100.0, 100000.0, 9.653370857459075e-130, np.nan], + ]) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + 'a, b, x, phi', + grid_a_b_x_value_acc[:, :4].tolist()) +def test_wright_data_grid_failures(a, b, x, phi): + """Test cases of test_data that do not reach relative accuracy of 1e-11""" + assert_allclose(wright_bessel(a, b, x), phi, rtol=1e-11) + + +@pytest.mark.parametrize( + 'a, b, x, phi, accuracy', + grid_a_b_x_value_acc.tolist()) +def test_wright_data_grid_less_accurate(a, b, x, phi, accuracy): + """Test cases of test_data that do not reach relative accuracy of 1e-11 + + Here we test for reduced accuracy or even nan. + """ + if np.isnan(accuracy): + assert np.isnan(wright_bessel(a, b, x)) + else: + assert_allclose(wright_bessel(a, b, x), phi, rtol=accuracy) diff --git a/.venv/Lib/site-packages/scipy/special/tests/test_zeta.py b/.venv/Lib/site-packages/scipy/special/tests/test_zeta.py new file mode 100644 index 0000000000000000000000000000000000000000..a335ebf8fad2cd919e1e31a5684741f8a076af80 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/special/tests/test_zeta.py @@ -0,0 +1,49 @@ +import scipy.special as sc +import numpy as np +from numpy.testing import assert_equal, assert_allclose + + +def test_zeta(): + assert_allclose(sc.zeta(2,2), np.pi**2/6 - 1, rtol=1e-12) + + +def test_zetac(): + # Expected values in the following were computed using Wolfram + # Alpha's `Zeta[x] - 1` + x = [-2.1, 0.8, 0.9999, 9, 50, 75] + desired = [ + -0.9972705002153750, + -5.437538415895550, + -10000.42279161673, + 0.002008392826082214, + 8.881784210930816e-16, + 2.646977960169853e-23, + ] + assert_allclose(sc.zetac(x), desired, rtol=1e-12) + + +def test_zetac_special_cases(): + assert sc.zetac(np.inf) == 0 + assert np.isnan(sc.zetac(-np.inf)) + assert sc.zetac(0) == -1.5 + assert sc.zetac(1.0) == np.inf + + assert_equal(sc.zetac([-2, -50, -100]), -1) + + +def test_riemann_zeta_special_cases(): + assert np.isnan(sc.zeta(np.nan)) + assert sc.zeta(np.inf) == 1 + assert sc.zeta(0) == -0.5 + + # Riemann zeta is zero add negative even integers. + assert_equal(sc.zeta([-2, -4, -6, -8, -10]), 0) + + assert_allclose(sc.zeta(2), np.pi**2/6, rtol=1e-12) + assert_allclose(sc.zeta(4), np.pi**4/90, rtol=1e-12) + + +def test_riemann_zeta_avoid_overflow(): + s = -260.00000000001 + desired = -5.6966307844402683127e+297 # Computed with Mpmath + assert_allclose(sc.zeta(s), desired, atol=0, rtol=5e-14) diff --git a/.venv/Lib/site-packages/scipy/stats/morestats.py b/.venv/Lib/site-packages/scipy/stats/morestats.py new file mode 100644 index 0000000000000000000000000000000000000000..2dc55c379623172da1a81035c11d9983a651114a --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/morestats.py @@ -0,0 +1,34 @@ +# This file is not meant for public use and will be removed in SciPy v2.0.0. +# Use the `scipy.stats` namespace for importing the functions +# included below. + +from scipy._lib.deprecation import _sub_module_deprecation + + +__all__ = [ # noqa: F822 + 'mvsdist', + 'bayes_mvs', 'kstat', 'kstatvar', 'probplot', 'ppcc_max', 'ppcc_plot', + 'boxcox_llf', 'boxcox', 'boxcox_normmax', 'boxcox_normplot', + 'shapiro', 'anderson', 'ansari', 'bartlett', 'levene', + 'fligner', 'mood', 'wilcoxon', 'median_test', + 'circmean', 'circvar', 'circstd', 'anderson_ksamp', + 'yeojohnson_llf', 'yeojohnson', 'yeojohnson_normmax', + 'yeojohnson_normplot', 'annotations', 'namedtuple', 'isscalar', 'log', + 'around', 'unique', 'arange', 'sort', 'amin', 'amax', 'atleast_1d', + 'array', 'compress', 'exp', 'ravel', 'count_nonzero', 'arctan2', + 'hypot', 'optimize', 'find_repeats', + 'chi2_contingency', 'distributions', 'rv_generic', 'Mean', + 'Variance', 'Std_dev', 'ShapiroResult', 'AndersonResult', + 'Anderson_ksampResult', 'AnsariResult', 'BartlettResult', + 'LeveneResult', 'FlignerResult', 'WilcoxonResult' +] + + +def __dir__(): + return __all__ + + +def __getattr__(name): + return _sub_module_deprecation(sub_package="stats", module="morestats", + private_modules=["_morestats"], all=__all__, + attribute=name) diff --git a/.venv/Lib/site-packages/scipy/stats/mstats.py b/.venv/Lib/site-packages/scipy/stats/mstats.py new file mode 100644 index 0000000000000000000000000000000000000000..f1fa00b7108f3c78b61e9587922a0a36698d379f --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/mstats.py @@ -0,0 +1,140 @@ +""" +=================================================================== +Statistical functions for masked arrays (:mod:`scipy.stats.mstats`) +=================================================================== + +.. currentmodule:: scipy.stats.mstats + +This module contains a large number of statistical functions that can +be used with masked arrays. + +Most of these functions are similar to those in `scipy.stats` but might +have small differences in the API or in the algorithm used. Since this +is a relatively new package, some API changes are still possible. + +Summary statistics +================== + +.. autosummary:: + :toctree: generated/ + + describe + gmean + hmean + kurtosis + mode + mquantiles + hdmedian + hdquantiles + hdquantiles_sd + idealfourths + plotting_positions + meppf + moment + skew + tmean + tvar + tmin + tmax + tsem + variation + find_repeats + sem + trimmed_mean + trimmed_mean_ci + trimmed_std + trimmed_var + +Frequency statistics +==================== + +.. autosummary:: + :toctree: generated/ + + scoreatpercentile + +Correlation functions +===================== + +.. autosummary:: + :toctree: generated/ + + f_oneway + pearsonr + spearmanr + pointbiserialr + kendalltau + kendalltau_seasonal + linregress + siegelslopes + theilslopes + sen_seasonal_slopes + +Statistical tests +================= + +.. autosummary:: + :toctree: generated/ + + ttest_1samp + ttest_onesamp + ttest_ind + ttest_rel + chisquare + kstest + ks_2samp + ks_1samp + ks_twosamp + mannwhitneyu + rankdata + kruskal + kruskalwallis + friedmanchisquare + brunnermunzel + skewtest + kurtosistest + normaltest + +Transformations +=============== + +.. autosummary:: + :toctree: generated/ + + obrientransform + trim + trima + trimmed_stde + trimr + trimtail + trimboth + winsorize + zmap + zscore + +Other +===== + +.. autosummary:: + :toctree: generated/ + + argstoarray + count_tied_groups + msign + compare_medians_ms + median_cihs + mjci + mquantiles_cimj + rsh + +""" +from . import _mstats_basic +from . import _mstats_extras +from ._mstats_basic import * # noqa: F403 +from ._mstats_extras import * # noqa: F403 +# Functions that support masked array input in stats but need to be kept in the +# mstats namespace for backwards compatibility: +from scipy.stats import gmean, hmean, zmap, zscore, chisquare + +__all__ = _mstats_basic.__all__ + _mstats_extras.__all__ +__all__ += ['gmean', 'hmean', 'zmap', 'zscore', 'chisquare'] diff --git a/.venv/Lib/site-packages/scipy/stats/mstats_basic.py b/.venv/Lib/site-packages/scipy/stats/mstats_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..f4bd503eebb42ad5e52adc710c92ee21ae6266bb --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/mstats_basic.py @@ -0,0 +1,50 @@ +# This file is not meant for public use and will be removed in SciPy v2.0.0. +# Use the `scipy.stats` namespace for importing the functions +# included below. + +from scipy._lib.deprecation import _sub_module_deprecation + + +__all__ = [ # noqa: F822 + 'argstoarray', + 'count_tied_groups', + 'describe', + 'f_oneway', 'find_repeats','friedmanchisquare', + 'kendalltau','kendalltau_seasonal','kruskal','kruskalwallis', + 'ks_twosamp', 'ks_2samp', 'kurtosis', 'kurtosistest', + 'ks_1samp', 'kstest', + 'linregress', + 'mannwhitneyu', 'meppf','mode','moment','mquantiles','msign', + 'normaltest', + 'obrientransform', + 'pearsonr','plotting_positions','pointbiserialr', + 'rankdata', + 'scoreatpercentile','sem', + 'sen_seasonal_slopes','skew','skewtest','spearmanr', + 'siegelslopes', 'theilslopes', + 'tmax','tmean','tmin','trim','trimboth', + 'trimtail','trima','trimr','trimmed_mean','trimmed_std', + 'trimmed_stde','trimmed_var','tsem','ttest_1samp','ttest_onesamp', + 'ttest_ind','ttest_rel','tvar', + 'variation', + 'winsorize', + 'brunnermunzel', 'ma', 'masked', 'nomask', 'namedtuple', + 'distributions', 'stats_linregress', 'stats_LinregressResult', + 'stats_theilslopes', 'stats_siegelslopes', 'ModeResult', + 'PointbiserialrResult', + 'Ttest_1sampResult', 'Ttest_indResult', 'Ttest_relResult', + 'MannwhitneyuResult', 'KruskalResult', 'trimdoc', 'trim1', + 'DescribeResult', 'stde_median', 'SkewtestResult', 'KurtosistestResult', + 'NormaltestResult', 'F_onewayResult', 'FriedmanchisquareResult', + 'BrunnerMunzelResult' +] + + +def __dir__(): + return __all__ + + +def __getattr__(name): + return _sub_module_deprecation(sub_package="stats", module="mstats_basic", + private_modules=["_mstats_basic"], all=__all__, + attribute=name, correct_module="mstats") diff --git a/.venv/Lib/site-packages/scipy/stats/mstats_extras.py b/.venv/Lib/site-packages/scipy/stats/mstats_extras.py new file mode 100644 index 0000000000000000000000000000000000000000..1f92cf933a7f4a125a788365db1528a71411f5e1 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/mstats_extras.py @@ -0,0 +1,26 @@ +# This file is not meant for public use and will be removed in SciPy v2.0.0. +# Use the `scipy.stats` namespace for importing the functions +# included below. + +from scipy._lib.deprecation import _sub_module_deprecation + + +__all__ = [ # noqa: F822 + 'compare_medians_ms', + 'hdquantiles', 'hdmedian', 'hdquantiles_sd', + 'idealfourths', + 'median_cihs','mjci','mquantiles_cimj', + 'rsh', + 'trimmed_mean_ci', 'ma', 'MaskedArray', 'mstats', + 'norm', 'beta', 't', 'binom' +] + + +def __dir__(): + return __all__ + + +def __getattr__(name): + return _sub_module_deprecation(sub_package="stats", module="mstats_extras", + private_modules=["_mstats_extras"], all=__all__, + attribute=name, correct_module="mstats") diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py b/.venv/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py new file mode 100644 index 0000000000000000000000000000000000000000..d3e3d5cc9a19a35ee098308d3997d4fb534f1a63 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_axis_nan_policy.py @@ -0,0 +1,1188 @@ +# Many scipy.stats functions support `axis` and `nan_policy` parameters. +# When the two are combined, it can be tricky to get all the behavior just +# right. This file contains a suite of common tests for scipy.stats functions +# that support `axis` and `nan_policy` and additional tests for some associated +# functions in stats._util. + +from itertools import product, combinations_with_replacement, permutations +import re +import pickle +import pytest + +import numpy as np +from numpy.testing import assert_allclose, assert_equal, suppress_warnings +from scipy import stats +from scipy.stats import norm # type: ignore[attr-defined] +from scipy.stats._axis_nan_policy import _masked_arrays_2_sentinel_arrays +from scipy._lib._util import AxisError + + +def unpack_ttest_result(res): + low, high = res.confidence_interval() + return (res.statistic, res.pvalue, res.df, res._standard_error, + res._estimate, low, high) + + +def _get_ttest_ci(ttest): + # get a function that returns the CI bounds of provided `ttest` + def ttest_ci(*args, **kwargs): + res = ttest(*args, **kwargs) + return res.confidence_interval() + return ttest_ci + + +axis_nan_policy_cases = [ + # function, args, kwds, number of samples, number of outputs, + # ... paired, unpacker function + # args, kwds typically aren't needed; just showing that they work + (stats.kruskal, tuple(), dict(), 3, 2, False, None), # 4 samples is slow + (stats.ranksums, ('less',), dict(), 2, 2, False, None), + (stats.mannwhitneyu, tuple(), {'method': 'asymptotic'}, 2, 2, False, None), + (stats.wilcoxon, ('pratt',), {'mode': 'auto'}, 2, 2, True, + lambda res: (res.statistic, res.pvalue)), + (stats.wilcoxon, tuple(), dict(), 1, 2, True, + lambda res: (res.statistic, res.pvalue)), + (stats.wilcoxon, tuple(), {'mode': 'approx'}, 1, 3, True, + lambda res: (res.statistic, res.pvalue, res.zstatistic)), + (stats.gmean, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.hmean, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.pmean, (1.42,), dict(), 1, 1, False, lambda x: (x,)), + (stats.sem, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.iqr, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.kurtosis, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.skew, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.kstat, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.kstatvar, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.moment, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.moment, tuple(), dict(order=[1, 2]), 1, 2, False, None), + (stats.jarque_bera, tuple(), dict(), 1, 2, False, None), + (stats.ttest_1samp, (np.array([0]),), dict(), 1, 7, False, + unpack_ttest_result), + (stats.ttest_rel, tuple(), dict(), 2, 7, True, unpack_ttest_result), + (stats.ttest_ind, tuple(), dict(), 2, 7, False, unpack_ttest_result), + (_get_ttest_ci(stats.ttest_1samp), (0,), dict(), 1, 2, False, None), + (_get_ttest_ci(stats.ttest_rel), tuple(), dict(), 2, 2, True, None), + (_get_ttest_ci(stats.ttest_ind), tuple(), dict(), 2, 2, False, None), + (stats.mode, tuple(), dict(), 1, 2, True, lambda x: (x.mode, x.count)), + (stats.differential_entropy, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.variation, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.friedmanchisquare, tuple(), dict(), 3, 2, True, None), + (stats.brunnermunzel, tuple(), dict(), 2, 2, False, None), + (stats.mood, tuple(), {}, 2, 2, False, None), + (stats.shapiro, tuple(), {}, 1, 2, False, None), + (stats.ks_1samp, (norm().cdf,), dict(), 1, 4, False, + lambda res: (*res, res.statistic_location, res.statistic_sign)), + (stats.ks_2samp, tuple(), dict(), 2, 4, False, + lambda res: (*res, res.statistic_location, res.statistic_sign)), + (stats.kstest, (norm().cdf,), dict(), 1, 4, False, + lambda res: (*res, res.statistic_location, res.statistic_sign)), + (stats.kstest, tuple(), dict(), 2, 4, False, + lambda res: (*res, res.statistic_location, res.statistic_sign)), + (stats.levene, tuple(), {}, 2, 2, False, None), + (stats.fligner, tuple(), {'center': 'trimmed', 'proportiontocut': 0.01}, + 2, 2, False, None), + (stats.ansari, tuple(), {}, 2, 2, False, None), + (stats.entropy, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.entropy, tuple(), dict(), 2, 1, True, lambda x: (x,)), + (stats.skewtest, tuple(), dict(), 1, 2, False, None), + (stats.kurtosistest, tuple(), dict(), 1, 2, False, None), + (stats.normaltest, tuple(), dict(), 1, 2, False, None), + (stats.cramervonmises, ("norm",), dict(), 1, 2, False, + lambda res: (res.statistic, res.pvalue)), + (stats.cramervonmises_2samp, tuple(), dict(), 2, 2, False, + lambda res: (res.statistic, res.pvalue)), + (stats.epps_singleton_2samp, tuple(), dict(), 2, 2, False, None), + (stats.bartlett, tuple(), {}, 2, 2, False, None), + (stats.tmean, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.tvar, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.tmin, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.tmax, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.tstd, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.tsem, tuple(), {}, 1, 1, False, lambda x: (x,)), + (stats.circmean, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.circvar, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.circstd, tuple(), dict(), 1, 1, False, lambda x: (x,)), + (stats.f_oneway, tuple(), {}, 2, 2, False, None), + (stats.alexandergovern, tuple(), {}, 2, 2, False, + lambda res: (res.statistic, res.pvalue)), + (stats.combine_pvalues, tuple(), {}, 1, 2, False, None), +] + +# If the message is one of those expected, put nans in +# appropriate places of `statistics` and `pvalues` +too_small_messages = {"The input contains nan", # for nan_policy="raise" + "Degrees of freedom <= 0 for slice", + "x and y should have at least 5 elements", + "Data must be at least length 3", + "The sample must contain at least two", + "x and y must contain at least two", + "division by zero", + "Mean of empty slice", + "Data passed to ks_2samp must not be empty", + "Not enough test observations", + "Not enough other observations", + "Not enough observations.", + "At least one observation is required", + "zero-size array to reduction operation maximum", + "`x` and `y` must be of nonzero size.", + "The exact distribution of the Wilcoxon test", + "Data input must not be empty", + "Window length (0) must be positive and less", + "Window length (1) must be positive and less", + "Window length (2) must be positive and less", + "skewtest is not valid with less than", + "kurtosistest requires at least 5", + "attempt to get argmax of an empty sequence", + "No array values within given limits", + "Input sample size must be greater than one.",} + +# If the message is one of these, results of the function may be inaccurate, +# but NaNs are not to be placed +inaccuracy_messages = {"Precision loss occurred in moment calculation", + "Sample size too small for normal approximation."} + +# For some functions, nan_policy='propagate' should not just return NaNs +override_propagate_funcs = {stats.mode} + +# For some functions, empty arrays produce non-NaN results +empty_special_case_funcs = {stats.entropy} + +def _mixed_data_generator(n_samples, n_repetitions, axis, rng, + paired=False): + # generate random samples to check the response of hypothesis tests to + # samples with different (but broadcastable) shapes and various + # nan patterns (e.g. all nans, some nans, no nans) along axis-slices + + data = [] + for i in range(n_samples): + n_patterns = 6 # number of distinct nan patterns + n_obs = 20 if paired else 20 + i # observations per axis-slice + x = np.ones((n_repetitions, n_patterns, n_obs)) * np.nan + + for j in range(n_repetitions): + samples = x[j, :, :] + + # case 0: axis-slice with all nans (0 reals) + # cases 1-3: axis-slice with 1-3 reals (the rest nans) + # case 4: axis-slice with mostly (all but two) reals + # case 5: axis slice with all reals + for k, n_reals in enumerate([0, 1, 2, 3, n_obs-2, n_obs]): + # for cases 1-3, need paired nansw to be in the same place + indices = rng.permutation(n_obs)[:n_reals] + samples[k, indices] = rng.random(size=n_reals) + + # permute the axis-slices just to show that order doesn't matter + samples[:] = rng.permutation(samples, axis=0) + + # For multi-sample tests, we want to test broadcasting and check + # that nan policy works correctly for each nan pattern for each input. + # This takes care of both simultaneously. + new_shape = [n_repetitions] + [1]*n_samples + [n_obs] + new_shape[1 + i] = 6 + x = x.reshape(new_shape) + + x = np.moveaxis(x, -1, axis) + data.append(x) + return data + + +def _homogeneous_data_generator(n_samples, n_repetitions, axis, rng, + paired=False, all_nans=True): + # generate random samples to check the response of hypothesis tests to + # samples with different (but broadcastable) shapes and homogeneous + # data (all nans or all finite) + data = [] + for i in range(n_samples): + n_obs = 20 if paired else 20 + i # observations per axis-slice + shape = [n_repetitions] + [1]*n_samples + [n_obs] + shape[1 + i] = 2 + x = np.ones(shape) * np.nan if all_nans else rng.random(shape) + x = np.moveaxis(x, -1, axis) + data.append(x) + return data + + +def nan_policy_1d(hypotest, data1d, unpacker, *args, n_outputs=2, + nan_policy='raise', paired=False, _no_deco=True, **kwds): + # Reference implementation for how `nan_policy` should work for 1d samples + + if nan_policy == 'raise': + for sample in data1d: + if np.any(np.isnan(sample)): + raise ValueError("The input contains nan values") + + elif (nan_policy == 'propagate' + and hypotest not in override_propagate_funcs): + # For all hypothesis tests tested, returning nans is the right thing. + # But many hypothesis tests don't propagate correctly (e.g. they treat + # np.nan the same as np.inf, which doesn't make sense when ranks are + # involved) so override that behavior here. + for sample in data1d: + if np.any(np.isnan(sample)): + return np.full(n_outputs, np.nan) + + elif nan_policy == 'omit': + # manually omit nans (or pairs in which at least one element is nan) + if not paired: + data1d = [sample[~np.isnan(sample)] for sample in data1d] + else: + nan_mask = np.isnan(data1d[0]) + for sample in data1d[1:]: + nan_mask = np.logical_or(nan_mask, np.isnan(sample)) + data1d = [sample[~nan_mask] for sample in data1d] + + return unpacker(hypotest(*data1d, *args, _no_deco=_no_deco, **kwds)) + + +@pytest.mark.filterwarnings('ignore::RuntimeWarning') +@pytest.mark.filterwarnings('ignore::UserWarning') +@pytest.mark.parametrize(("hypotest", "args", "kwds", "n_samples", "n_outputs", + "paired", "unpacker"), axis_nan_policy_cases) +@pytest.mark.parametrize(("nan_policy"), ("propagate", "omit", "raise")) +@pytest.mark.parametrize(("axis"), (1,)) +@pytest.mark.parametrize(("data_generator"), ("mixed",)) +def test_axis_nan_policy_fast(hypotest, args, kwds, n_samples, n_outputs, + paired, unpacker, nan_policy, axis, + data_generator): + _axis_nan_policy_test(hypotest, args, kwds, n_samples, n_outputs, paired, + unpacker, nan_policy, axis, data_generator) + + +@pytest.mark.slow +@pytest.mark.filterwarnings('ignore::RuntimeWarning') +@pytest.mark.filterwarnings('ignore::UserWarning') +@pytest.mark.parametrize(("hypotest", "args", "kwds", "n_samples", "n_outputs", + "paired", "unpacker"), axis_nan_policy_cases) +@pytest.mark.parametrize(("nan_policy"), ("propagate", "omit", "raise")) +@pytest.mark.parametrize(("axis"), range(-3, 3)) +@pytest.mark.parametrize(("data_generator"), + ("all_nans", "all_finite", "mixed")) +def test_axis_nan_policy_full(hypotest, args, kwds, n_samples, n_outputs, + paired, unpacker, nan_policy, axis, + data_generator): + _axis_nan_policy_test(hypotest, args, kwds, n_samples, n_outputs, paired, + unpacker, nan_policy, axis, data_generator) + + +def _axis_nan_policy_test(hypotest, args, kwds, n_samples, n_outputs, paired, + unpacker, nan_policy, axis, data_generator): + # Tests the 1D and vectorized behavior of hypothesis tests against a + # reference implementation (nan_policy_1d with np.ndenumerate) + + # Some hypothesis tests return a non-iterable that needs an `unpacker` to + # extract the statistic and p-value. For those that don't: + if not unpacker: + def unpacker(res): + return res + + rng = np.random.default_rng(0) + + # Generate multi-dimensional test data with all important combinations + # of patterns of nans along `axis` + n_repetitions = 3 # number of repetitions of each pattern + data_gen_kwds = {'n_samples': n_samples, 'n_repetitions': n_repetitions, + 'axis': axis, 'rng': rng, 'paired': paired} + if data_generator == 'mixed': + inherent_size = 6 # number of distinct types of patterns + data = _mixed_data_generator(**data_gen_kwds) + elif data_generator == 'all_nans': + inherent_size = 2 # hard-coded in _homogeneous_data_generator + data_gen_kwds['all_nans'] = True + data = _homogeneous_data_generator(**data_gen_kwds) + elif data_generator == 'all_finite': + inherent_size = 2 # hard-coded in _homogeneous_data_generator + data_gen_kwds['all_nans'] = False + data = _homogeneous_data_generator(**data_gen_kwds) + + output_shape = [n_repetitions] + [inherent_size]*n_samples + + # To generate reference behavior to compare against, loop over the axis- + # slices in data. Make indexing easier by moving `axis` to the end and + # broadcasting all samples to the same shape. + data_b = [np.moveaxis(sample, axis, -1) for sample in data] + data_b = [np.broadcast_to(sample, output_shape + [sample.shape[-1]]) + for sample in data_b] + statistics = np.zeros(output_shape) + pvalues = np.zeros(output_shape) + + for i, _ in np.ndenumerate(statistics): + data1d = [sample[i] for sample in data_b] + with np.errstate(divide='ignore', invalid='ignore'): + try: + res1d = nan_policy_1d(hypotest, data1d, unpacker, *args, + n_outputs=n_outputs, + nan_policy=nan_policy, + paired=paired, _no_deco=True, **kwds) + + # Eventually we'll check the results of a single, vectorized + # call of `hypotest` against the arrays `statistics` and + # `pvalues` populated using the reference `nan_policy_1d`. + # But while we're at it, check the results of a 1D call to + # `hypotest` against the reference `nan_policy_1d`. + res1db = unpacker(hypotest(*data1d, *args, + nan_policy=nan_policy, **kwds)) + assert_equal(res1db[0], res1d[0]) + if len(res1db) == 2: + assert_equal(res1db[1], res1d[1]) + + # When there is not enough data in 1D samples, many existing + # hypothesis tests raise errors instead of returning nans . + # For vectorized calls, we put nans in the corresponding elements + # of the output. + except (RuntimeWarning, UserWarning, ValueError, + ZeroDivisionError) as e: + + # whatever it is, make sure same error is raised by both + # `nan_policy_1d` and `hypotest` + with pytest.raises(type(e), match=re.escape(str(e))): + nan_policy_1d(hypotest, data1d, unpacker, *args, + n_outputs=n_outputs, nan_policy=nan_policy, + paired=paired, _no_deco=True, **kwds) + with pytest.raises(type(e), match=re.escape(str(e))): + hypotest(*data1d, *args, nan_policy=nan_policy, **kwds) + + if any([str(e).startswith(message) + for message in too_small_messages]): + res1d = np.full(n_outputs, np.nan) + elif any([str(e).startswith(message) + for message in inaccuracy_messages]): + with suppress_warnings() as sup: + sup.filter(RuntimeWarning) + sup.filter(UserWarning) + res1d = nan_policy_1d(hypotest, data1d, unpacker, + *args, n_outputs=n_outputs, + nan_policy=nan_policy, + paired=paired, _no_deco=True, + **kwds) + else: + raise e + statistics[i] = res1d[0] + if len(res1d) == 2: + pvalues[i] = res1d[1] + + # Perform a vectorized call to the hypothesis test. + # If `nan_policy == 'raise'`, check that it raises the appropriate error. + # If not, compare against the output against `statistics` and `pvalues` + if nan_policy == 'raise' and not data_generator == "all_finite": + message = 'The input contains nan values' + with pytest.raises(ValueError, match=message): + hypotest(*data, axis=axis, nan_policy=nan_policy, *args, **kwds) + + else: + with suppress_warnings() as sup, \ + np.errstate(divide='ignore', invalid='ignore'): + sup.filter(RuntimeWarning, "Precision loss occurred in moment") + sup.filter(UserWarning, "Sample size too small for normal " + "approximation.") + res = unpacker(hypotest(*data, axis=axis, nan_policy=nan_policy, + *args, **kwds)) + assert_allclose(res[0], statistics, rtol=1e-15) + assert_equal(res[0].dtype, statistics.dtype) + + if len(res) == 2: + assert_allclose(res[1], pvalues, rtol=1e-15) + assert_equal(res[1].dtype, pvalues.dtype) + + +@pytest.mark.filterwarnings('ignore::RuntimeWarning') +@pytest.mark.parametrize(("hypotest", "args", "kwds", "n_samples", "n_outputs", + "paired", "unpacker"), axis_nan_policy_cases) +@pytest.mark.parametrize(("nan_policy"), ("propagate", "omit", "raise")) +@pytest.mark.parametrize(("data_generator"), + ("all_nans", "all_finite", "mixed", "empty")) +def test_axis_nan_policy_axis_is_None(hypotest, args, kwds, n_samples, + n_outputs, paired, unpacker, nan_policy, + data_generator): + # check for correct behavior when `axis=None` + + if not unpacker: + def unpacker(res): + return res + + rng = np.random.default_rng(0) + + if data_generator == "empty": + data = [rng.random((2, 0)) for i in range(n_samples)] + else: + data = [rng.random((2, 20)) for i in range(n_samples)] + + if data_generator == "mixed": + masks = [rng.random((2, 20)) > 0.9 for i in range(n_samples)] + for sample, mask in zip(data, masks): + sample[mask] = np.nan + elif data_generator == "all_nans": + data = [sample * np.nan for sample in data] + + data_raveled = [sample.ravel() for sample in data] + + if nan_policy == 'raise' and data_generator not in {"all_finite", "empty"}: + message = 'The input contains nan values' + + # check for correct behavior whether or not data is 1d to begin with + with pytest.raises(ValueError, match=message): + hypotest(*data, axis=None, nan_policy=nan_policy, + *args, **kwds) + with pytest.raises(ValueError, match=message): + hypotest(*data_raveled, axis=None, nan_policy=nan_policy, + *args, **kwds) + + else: + # behavior of reference implementation with 1d input, hypotest with 1d + # input, and hypotest with Nd input should match, whether that means + # that outputs are equal or they raise the same exception + + ea_str, eb_str, ec_str = None, None, None + with np.errstate(divide='ignore', invalid='ignore'): + try: + res1da = nan_policy_1d(hypotest, data_raveled, unpacker, *args, + n_outputs=n_outputs, + nan_policy=nan_policy, paired=paired, + _no_deco=True, **kwds) + except (RuntimeWarning, ValueError, ZeroDivisionError) as ea: + ea_str = str(ea) + + try: + res1db = unpacker(hypotest(*data_raveled, *args, + nan_policy=nan_policy, **kwds)) + except (RuntimeWarning, ValueError, ZeroDivisionError) as eb: + eb_str = str(eb) + + try: + res1dc = unpacker(hypotest(*data, *args, axis=None, + nan_policy=nan_policy, **kwds)) + except (RuntimeWarning, ValueError, ZeroDivisionError) as ec: + ec_str = str(ec) + + if ea_str or eb_str or ec_str: + assert any([str(ea_str).startswith(message) + for message in too_small_messages]) + assert ea_str == eb_str == ec_str + else: + assert_equal(res1db, res1da) + assert_equal(res1dc, res1da) + for item in list(res1da) + list(res1db) + list(res1dc): + # Most functions naturally return NumPy numbers, which + # are drop-in replacements for the Python versions but with + # desirable attributes. Make sure this is consistent. + assert np.issubdtype(item.dtype, np.number) + +# Test keepdims for: +# - single-output and multi-output functions (gmean and mannwhitneyu) +# - Axis negative, positive, None, and tuple +# - 1D with no NaNs +# - 1D with NaN propagation +# - Zero-sized output +@pytest.mark.parametrize("nan_policy", ("omit", "propagate")) +@pytest.mark.parametrize( + ("hypotest", "args", "kwds", "n_samples", "unpacker"), + ((stats.gmean, tuple(), dict(), 1, lambda x: (x,)), + (stats.mannwhitneyu, tuple(), {'method': 'asymptotic'}, 2, None)) +) +@pytest.mark.parametrize( + ("sample_shape", "axis_cases"), + (((2, 3, 3, 4), (None, 0, -1, (0, 2), (1, -1), (3, 1, 2, 0))), + ((10, ), (0, -1)), + ((20, 0), (0, 1))) +) +def test_keepdims(hypotest, args, kwds, n_samples, unpacker, + sample_shape, axis_cases, nan_policy): + # test if keepdims parameter works correctly + if not unpacker: + def unpacker(res): + return res + rng = np.random.default_rng(0) + data = [rng.random(sample_shape) for _ in range(n_samples)] + nan_data = [sample.copy() for sample in data] + nan_mask = [rng.random(sample_shape) < 0.2 for _ in range(n_samples)] + for sample, mask in zip(nan_data, nan_mask): + sample[mask] = np.nan + for axis in axis_cases: + expected_shape = list(sample_shape) + if axis is None: + expected_shape = np.ones(len(sample_shape)) + else: + if isinstance(axis, int): + expected_shape[axis] = 1 + else: + for ax in axis: + expected_shape[ax] = 1 + expected_shape = tuple(expected_shape) + res = unpacker(hypotest(*data, *args, axis=axis, keepdims=True, + **kwds)) + res_base = unpacker(hypotest(*data, *args, axis=axis, keepdims=False, + **kwds)) + nan_res = unpacker(hypotest(*nan_data, *args, axis=axis, + keepdims=True, nan_policy=nan_policy, + **kwds)) + nan_res_base = unpacker(hypotest(*nan_data, *args, axis=axis, + keepdims=False, + nan_policy=nan_policy, **kwds)) + for r, r_base, rn, rn_base in zip(res, res_base, nan_res, + nan_res_base): + assert r.shape == expected_shape + r = np.squeeze(r, axis=axis) + assert_equal(r, r_base) + assert rn.shape == expected_shape + rn = np.squeeze(rn, axis=axis) + assert_equal(rn, rn_base) + + +@pytest.mark.parametrize(("fun", "nsamp"), + [(stats.kstat, 1), + (stats.kstatvar, 1)]) +def test_hypotest_back_compat_no_axis(fun, nsamp): + m, n = 8, 9 + + rng = np.random.default_rng(0) + x = rng.random((nsamp, m, n)) + res = fun(*x) + res2 = fun(*x, _no_deco=True) + res3 = fun([xi.ravel() for xi in x]) + assert_equal(res, res2) + assert_equal(res, res3) + + +@pytest.mark.parametrize(("axis"), (0, 1, 2)) +def test_axis_nan_policy_decorated_positional_axis(axis): + # Test for correct behavior of function decorated with + # _axis_nan_policy_decorator whether `axis` is provided as positional or + # keyword argument + + shape = (8, 9, 10) + rng = np.random.default_rng(0) + x = rng.random(shape) + y = rng.random(shape) + res1 = stats.mannwhitneyu(x, y, True, 'two-sided', axis) + res2 = stats.mannwhitneyu(x, y, True, 'two-sided', axis=axis) + assert_equal(res1, res2) + + message = "mannwhitneyu() got multiple values for argument 'axis'" + with pytest.raises(TypeError, match=re.escape(message)): + stats.mannwhitneyu(x, y, True, 'two-sided', axis, axis=axis) + + +def test_axis_nan_policy_decorated_positional_args(): + # Test for correct behavior of function decorated with + # _axis_nan_policy_decorator when function accepts *args + + shape = (3, 8, 9, 10) + rng = np.random.default_rng(0) + x = rng.random(shape) + x[0, 0, 0, 0] = np.nan + stats.kruskal(*x) + + message = "kruskal() got an unexpected keyword argument 'samples'" + with pytest.raises(TypeError, match=re.escape(message)): + stats.kruskal(samples=x) + + with pytest.raises(TypeError, match=re.escape(message)): + stats.kruskal(*x, samples=x) + + +def test_axis_nan_policy_decorated_keyword_samples(): + # Test for correct behavior of function decorated with + # _axis_nan_policy_decorator whether samples are provided as positional or + # keyword arguments + + shape = (2, 8, 9, 10) + rng = np.random.default_rng(0) + x = rng.random(shape) + x[0, 0, 0, 0] = np.nan + res1 = stats.mannwhitneyu(*x) + res2 = stats.mannwhitneyu(x=x[0], y=x[1]) + assert_equal(res1, res2) + + message = "mannwhitneyu() got multiple values for argument" + with pytest.raises(TypeError, match=re.escape(message)): + stats.mannwhitneyu(*x, x=x[0], y=x[1]) + + +@pytest.mark.parametrize(("hypotest", "args", "kwds", "n_samples", "n_outputs", + "paired", "unpacker"), axis_nan_policy_cases) +def test_axis_nan_policy_decorated_pickled(hypotest, args, kwds, n_samples, + n_outputs, paired, unpacker): + if "ttest_ci" in hypotest.__name__: + pytest.skip("Can't pickle functions defined within functions.") + + rng = np.random.default_rng(0) + + # Some hypothesis tests return a non-iterable that needs an `unpacker` to + # extract the statistic and p-value. For those that don't: + if not unpacker: + def unpacker(res): + return res + + data = rng.uniform(size=(n_samples, 2, 30)) + pickled_hypotest = pickle.dumps(hypotest) + unpickled_hypotest = pickle.loads(pickled_hypotest) + res1 = unpacker(hypotest(*data, *args, axis=-1, **kwds)) + res2 = unpacker(unpickled_hypotest(*data, *args, axis=-1, **kwds)) + assert_allclose(res1, res2, rtol=1e-12) + + +def test_check_empty_inputs(): + # Test that _check_empty_inputs is doing its job, at least for single- + # sample inputs. (Multi-sample functionality is tested below.) + # If the input sample is not empty, it should return None. + # If the input sample is empty, it should return an array of NaNs or an + # empty array of appropriate shape. np.mean is used as a reference for the + # output because, like the statistics calculated by these functions, + # it works along and "consumes" `axis` but preserves the other axes. + for i in range(5): + for combo in combinations_with_replacement([0, 1, 2], i): + for axis in range(len(combo)): + samples = (np.zeros(combo),) + output = stats._axis_nan_policy._check_empty_inputs(samples, + axis) + if output is not None: + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning, "Mean of empty slice.") + sup.filter(RuntimeWarning, "invalid value encountered") + reference = samples[0].mean(axis=axis) + np.testing.assert_equal(output, reference) + + +def _check_arrays_broadcastable(arrays, axis): + # https://numpy.org/doc/stable/user/basics.broadcasting.html + # "When operating on two arrays, NumPy compares their shapes element-wise. + # It starts with the trailing (i.e. rightmost) dimensions and works its + # way left. + # Two dimensions are compatible when + # 1. they are equal, or + # 2. one of them is 1 + # ... + # Arrays do not need to have the same number of dimensions." + # (Clarification: if the arrays are compatible according to the criteria + # above and an array runs out of dimensions, it is still compatible.) + # Below, we follow the rules above except ignoring `axis` + + n_dims = max([arr.ndim for arr in arrays]) + if axis is not None: + # convert to negative axis + axis = (-n_dims + axis) if axis >= 0 else axis + + for dim in range(1, n_dims+1): # we'll index from -1 to -n_dims, inclusive + if -dim == axis: + continue # ignore lengths along `axis` + + dim_lengths = set() + for arr in arrays: + if dim <= arr.ndim and arr.shape[-dim] != 1: + dim_lengths.add(arr.shape[-dim]) + + if len(dim_lengths) > 1: + return False + return True + + +@pytest.mark.slow +@pytest.mark.parametrize(("hypotest", "args", "kwds", "n_samples", "n_outputs", + "paired", "unpacker"), axis_nan_policy_cases) +def test_empty(hypotest, args, kwds, n_samples, n_outputs, paired, unpacker): + # test for correct output shape when at least one input is empty + + if hypotest in override_propagate_funcs: + reason = "Doesn't follow the usual pattern. Tested separately." + pytest.skip(reason=reason) + + if unpacker is None: + unpacker = lambda res: (res[0], res[1]) # noqa: E731 + + def small_data_generator(n_samples, n_dims): + + def small_sample_generator(n_dims): + # return all possible "small" arrays in up to n_dim dimensions + for i in n_dims: + # "small" means with size along dimension either 0 or 1 + for combo in combinations_with_replacement([0, 1, 2], i): + yield np.zeros(combo) + + # yield all possible combinations of small samples + gens = [small_sample_generator(n_dims) for i in range(n_samples)] + yield from product(*gens) + + n_dims = [2, 3] + for samples in small_data_generator(n_samples, n_dims): + + # this test is only for arrays of zero size + if not any(sample.size == 0 for sample in samples): + continue + + max_axis = max(sample.ndim for sample in samples) + + # need to test for all valid values of `axis` parameter, too + for axis in range(-max_axis, max_axis): + + try: + # After broadcasting, all arrays are the same shape, so + # the shape of the output should be the same as a single- + # sample statistic. Use np.mean as a reference. + concat = stats._stats_py._broadcast_concatenate(samples, axis) + with np.testing.suppress_warnings() as sup: + sup.filter(RuntimeWarning, "Mean of empty slice.") + sup.filter(RuntimeWarning, "invalid value encountered") + expected = np.mean(concat, axis=axis) * np.nan + + if hypotest in empty_special_case_funcs: + empty_val = hypotest(*([[]]*len(samples)), *args, **kwds) + mask = np.isnan(expected) + expected[mask] = empty_val + + with np.testing.suppress_warnings() as sup: + # generated by f_oneway for too_small inputs + sup.filter(stats.DegenerateDataWarning) + res = hypotest(*samples, *args, axis=axis, **kwds) + res = unpacker(res) + + for i in range(n_outputs): + assert_equal(res[i], expected) + + except ValueError: + # confirm that the arrays truly are not broadcastable + assert not _check_arrays_broadcastable(samples, + None if paired else axis) + + # confirm that _both_ `_broadcast_concatenate` and `hypotest` + # produce this information. + message = "Array shapes are incompatible for broadcasting." + with pytest.raises(ValueError, match=message): + stats._stats_py._broadcast_concatenate(samples, axis, paired) + with pytest.raises(ValueError, match=message): + hypotest(*samples, *args, axis=axis, **kwds) + + +def test_masked_array_2_sentinel_array(): + # prepare arrays + np.random.seed(0) + A = np.random.rand(10, 11, 12) + B = np.random.rand(12) + mask = A < 0.5 + A = np.ma.masked_array(A, mask) + + # set arbitrary elements to special values + # (these values might have been considered for use as sentinel values) + max_float = np.finfo(np.float64).max + max_float2 = np.nextafter(max_float, -np.inf) + max_float3 = np.nextafter(max_float2, -np.inf) + A[3, 4, 1] = np.nan + A[4, 5, 2] = np.inf + A[5, 6, 3] = max_float + B[8] = np.nan + B[7] = np.inf + B[6] = max_float2 + + # convert masked A to array with sentinel value, don't modify B + out_arrays, sentinel = _masked_arrays_2_sentinel_arrays([A, B]) + A_out, B_out = out_arrays + + # check that good sentinel value was chosen (according to intended logic) + assert (sentinel != max_float) and (sentinel != max_float2) + assert sentinel == max_float3 + + # check that output arrays are as intended + A_reference = A.data + A_reference[A.mask] = sentinel + np.testing.assert_array_equal(A_out, A_reference) + assert B_out is B + + +def test_masked_dtype(): + # When _masked_arrays_2_sentinel_arrays was first added, it always + # upcast the arrays to np.float64. After gh16662, check expected promotion + # and that the expected sentinel is found. + + # these are important because the max of the promoted dtype is the first + # candidate to be the sentinel value + max16 = np.iinfo(np.int16).max + max128c = np.finfo(np.complex128).max + + # a is a regular array, b has masked elements, and c has no masked elements + a = np.array([1, 2, max16], dtype=np.int16) + b = np.ma.array([1, 2, 1], dtype=np.int8, mask=[0, 1, 0]) + c = np.ma.array([1, 2, 1], dtype=np.complex128, mask=[0, 0, 0]) + + # check integer masked -> sentinel conversion + out_arrays, sentinel = _masked_arrays_2_sentinel_arrays([a, b]) + a_out, b_out = out_arrays + assert sentinel == max16-1 # not max16 because max16 was in the data + assert b_out.dtype == np.int16 # check expected promotion + assert_allclose(b_out, [b[0], sentinel, b[-1]]) # check sentinel placement + assert a_out is a # not a masked array, so left untouched + assert not isinstance(b_out, np.ma.MaskedArray) # b became regular array + + # similarly with complex + out_arrays, sentinel = _masked_arrays_2_sentinel_arrays([b, c]) + b_out, c_out = out_arrays + assert sentinel == max128c # max128c was not in the data + assert b_out.dtype == np.complex128 # b got promoted + assert_allclose(b_out, [b[0], sentinel, b[-1]]) # check sentinel placement + assert not isinstance(b_out, np.ma.MaskedArray) # b became regular array + assert not isinstance(c_out, np.ma.MaskedArray) # c became regular array + + # Also, check edge case when a sentinel value cannot be found in the data + min8, max8 = np.iinfo(np.int8).min, np.iinfo(np.int8).max + a = np.arange(min8, max8+1, dtype=np.int8) # use all possible values + mask1 = np.zeros_like(a, dtype=bool) + mask0 = np.zeros_like(a, dtype=bool) + + # a masked value can be used as the sentinel + mask1[1] = True + a1 = np.ma.array(a, mask=mask1) + out_arrays, sentinel = _masked_arrays_2_sentinel_arrays([a1]) + assert sentinel == min8+1 + + # unless it's the smallest possible; skipped for simiplicity (see code) + mask0[0] = True + a0 = np.ma.array(a, mask=mask0) + message = "This function replaces masked elements with sentinel..." + with pytest.raises(ValueError, match=message): + _masked_arrays_2_sentinel_arrays([a0]) + + # test that dtype is preserved in functions + a = np.ma.array([1, 2, 3], mask=[0, 1, 0], dtype=np.float32) + assert stats.gmean(a).dtype == np.float32 + + +def test_masked_stat_1d(): + # basic test of _axis_nan_policy_factory with 1D masked sample + males = [19, 22, 16, 29, 24] + females = [20, 11, 17, 12] + res = stats.mannwhitneyu(males, females) + + # same result when extra nan is omitted + females2 = [20, 11, 17, np.nan, 12] + res2 = stats.mannwhitneyu(males, females2, nan_policy='omit') + np.testing.assert_array_equal(res2, res) + + # same result when extra element is masked + females3 = [20, 11, 17, 1000, 12] + mask3 = [False, False, False, True, False] + females3 = np.ma.masked_array(females3, mask=mask3) + res3 = stats.mannwhitneyu(males, females3) + np.testing.assert_array_equal(res3, res) + + # same result when extra nan is omitted and additional element is masked + females4 = [20, 11, 17, np.nan, 1000, 12] + mask4 = [False, False, False, False, True, False] + females4 = np.ma.masked_array(females4, mask=mask4) + res4 = stats.mannwhitneyu(males, females4, nan_policy='omit') + np.testing.assert_array_equal(res4, res) + + # same result when extra elements, including nan, are masked + females5 = [20, 11, 17, np.nan, 1000, 12] + mask5 = [False, False, False, True, True, False] + females5 = np.ma.masked_array(females5, mask=mask5) + res5 = stats.mannwhitneyu(males, females5, nan_policy='propagate') + res6 = stats.mannwhitneyu(males, females5, nan_policy='raise') + np.testing.assert_array_equal(res5, res) + np.testing.assert_array_equal(res6, res) + + +@pytest.mark.parametrize(("axis"), range(-3, 3)) +def test_masked_stat_3d(axis): + # basic test of _axis_nan_policy_factory with 3D masked sample + np.random.seed(0) + a = np.random.rand(3, 4, 5) + b = np.random.rand(4, 5) + c = np.random.rand(4, 1) + + mask_a = a < 0.1 + mask_c = [False, False, False, True] + a_masked = np.ma.masked_array(a, mask=mask_a) + c_masked = np.ma.masked_array(c, mask=mask_c) + + a_nans = a.copy() + a_nans[mask_a] = np.nan + c_nans = c.copy() + c_nans[mask_c] = np.nan + + res = stats.kruskal(a_nans, b, c_nans, nan_policy='omit', axis=axis) + res2 = stats.kruskal(a_masked, b, c_masked, axis=axis) + np.testing.assert_array_equal(res, res2) + + +def test_mixed_mask_nan_1(): + # targeted test of _axis_nan_policy_factory with 2D masked sample: + # omitting samples with masks and nan_policy='omit' are equivalent + # also checks paired-sample sentinel value removal + m, n = 3, 20 + axis = -1 + + np.random.seed(0) + a = np.random.rand(m, n) + b = np.random.rand(m, n) + mask_a1 = np.random.rand(m, n) < 0.2 + mask_a2 = np.random.rand(m, n) < 0.1 + mask_b1 = np.random.rand(m, n) < 0.15 + mask_b2 = np.random.rand(m, n) < 0.15 + mask_a1[2, :] = True + + a_nans = a.copy() + b_nans = b.copy() + a_nans[mask_a1 | mask_a2] = np.nan + b_nans[mask_b1 | mask_b2] = np.nan + + a_masked1 = np.ma.masked_array(a, mask=mask_a1) + b_masked1 = np.ma.masked_array(b, mask=mask_b1) + a_masked1[mask_a2] = np.nan + b_masked1[mask_b2] = np.nan + + a_masked2 = np.ma.masked_array(a, mask=mask_a2) + b_masked2 = np.ma.masked_array(b, mask=mask_b2) + a_masked2[mask_a1] = np.nan + b_masked2[mask_b1] = np.nan + + a_masked3 = np.ma.masked_array(a, mask=(mask_a1 | mask_a2)) + b_masked3 = np.ma.masked_array(b, mask=(mask_b1 | mask_b2)) + + res = stats.wilcoxon(a_nans, b_nans, nan_policy='omit', axis=axis) + res1 = stats.wilcoxon(a_masked1, b_masked1, nan_policy='omit', axis=axis) + res2 = stats.wilcoxon(a_masked2, b_masked2, nan_policy='omit', axis=axis) + res3 = stats.wilcoxon(a_masked3, b_masked3, nan_policy='raise', axis=axis) + res4 = stats.wilcoxon(a_masked3, b_masked3, + nan_policy='propagate', axis=axis) + + np.testing.assert_array_equal(res1, res) + np.testing.assert_array_equal(res2, res) + np.testing.assert_array_equal(res3, res) + np.testing.assert_array_equal(res4, res) + + +def test_mixed_mask_nan_2(): + # targeted test of _axis_nan_policy_factory with 2D masked sample: + # check for expected interaction between masks and nans + + # Cases here are + # [mixed nan/mask, all nans, all masked, + # unmasked nan, masked nan, unmasked non-nan] + a = [[1, np.nan, 2], [np.nan, np.nan, np.nan], [1, 2, 3], + [1, np.nan, 3], [1, np.nan, 3], [1, 2, 3]] + mask = [[1, 0, 1], [0, 0, 0], [1, 1, 1], + [0, 0, 0], [0, 1, 0], [0, 0, 0]] + a_masked = np.ma.masked_array(a, mask=mask) + b = [[4, 5, 6]] + ref1 = stats.ranksums([1, 3], [4, 5, 6]) + ref2 = stats.ranksums([1, 2, 3], [4, 5, 6]) + + # nan_policy = 'omit' + # all elements are removed from first three rows + # middle element is removed from fourth and fifth rows + # no elements removed from last row + res = stats.ranksums(a_masked, b, nan_policy='omit', axis=-1) + stat_ref = [np.nan, np.nan, np.nan, + ref1.statistic, ref1.statistic, ref2.statistic] + p_ref = [np.nan, np.nan, np.nan, + ref1.pvalue, ref1.pvalue, ref2.pvalue] + np.testing.assert_array_equal(res.statistic, stat_ref) + np.testing.assert_array_equal(res.pvalue, p_ref) + + # nan_policy = 'propagate' + # nans propagate in first, second, and fourth row + # all elements are removed by mask from third row + # middle element is removed from fifth row + # no elements removed from last row + res = stats.ranksums(a_masked, b, nan_policy='propagate', axis=-1) + stat_ref = [np.nan, np.nan, np.nan, + np.nan, ref1.statistic, ref2.statistic] + p_ref = [np.nan, np.nan, np.nan, + np.nan, ref1.pvalue, ref2.pvalue] + np.testing.assert_array_equal(res.statistic, stat_ref) + np.testing.assert_array_equal(res.pvalue, p_ref) + + +def test_axis_None_vs_tuple(): + # `axis` `None` should be equivalent to tuple with all axes + shape = (3, 8, 9, 10) + rng = np.random.default_rng(0) + x = rng.random(shape) + res = stats.kruskal(*x, axis=None) + res2 = stats.kruskal(*x, axis=(0, 1, 2)) + np.testing.assert_array_equal(res, res2) + + +def test_axis_None_vs_tuple_with_broadcasting(): + # `axis` `None` should be equivalent to tuple with all axes, + # which should be equivalent to raveling the arrays before passing them + rng = np.random.default_rng(0) + x = rng.random((5, 1)) + y = rng.random((1, 5)) + x2, y2 = np.broadcast_arrays(x, y) + + res0 = stats.mannwhitneyu(x.ravel(), y.ravel()) + res1 = stats.mannwhitneyu(x, y, axis=None) + res2 = stats.mannwhitneyu(x, y, axis=(0, 1)) + res3 = stats.mannwhitneyu(x2.ravel(), y2.ravel()) + + assert res1 == res0 + assert res2 == res0 + assert res3 != res0 + + +@pytest.mark.parametrize(("axis"), + list(permutations(range(-3, 3), 2)) + [(-4, 1)]) +def test_other_axis_tuples(axis): + # Check that _axis_nan_policy_factory treats all `axis` tuples as expected + rng = np.random.default_rng(0) + shape_x = (4, 5, 6) + shape_y = (1, 6) + x = rng.random(shape_x) + y = rng.random(shape_y) + axis_original = axis + + # convert axis elements to positive + axis = tuple([(i if i >= 0 else 3 + i) for i in axis]) + axis = sorted(axis) + + if len(set(axis)) != len(axis): + message = "`axis` must contain only distinct elements" + with pytest.raises(AxisError, match=re.escape(message)): + stats.mannwhitneyu(x, y, axis=axis_original) + return + + if axis[0] < 0 or axis[-1] > 2: + message = "`axis` is out of bounds for array of dimension 3" + with pytest.raises(AxisError, match=re.escape(message)): + stats.mannwhitneyu(x, y, axis=axis_original) + return + + res = stats.mannwhitneyu(x, y, axis=axis_original) + + # reference behavior + not_axis = {0, 1, 2} - set(axis) # which axis is not part of `axis` + not_axis = next(iter(not_axis)) # take it out of the set + + x2 = x + shape_y_broadcasted = [1, 1, 6] + shape_y_broadcasted[not_axis] = shape_x[not_axis] + y2 = np.broadcast_to(y, shape_y_broadcasted) + + m = x2.shape[not_axis] + x2 = np.moveaxis(x2, axis, (1, 2)) + y2 = np.moveaxis(y2, axis, (1, 2)) + x2 = np.reshape(x2, (m, -1)) + y2 = np.reshape(y2, (m, -1)) + res2 = stats.mannwhitneyu(x2, y2, axis=1) + + np.testing.assert_array_equal(res, res2) + + +@pytest.mark.parametrize( + ("weighted_fun_name, unpacker"), + [ + ("gmean", lambda x: x), + ("hmean", lambda x: x), + ("pmean", lambda x: x), + ("combine_pvalues", lambda x: (x.pvalue, x.statistic)), + ], +) +def test_mean_mixed_mask_nan_weights(weighted_fun_name, unpacker): + # targeted test of _axis_nan_policy_factory with 2D masked sample: + # omitting samples with masks and nan_policy='omit' are equivalent + # also checks paired-sample sentinel value removal + + if weighted_fun_name == 'pmean': + def weighted_fun(a, **kwargs): + return stats.pmean(a, p=0.42, **kwargs) + else: + weighted_fun = getattr(stats, weighted_fun_name) + + def func(*args, **kwargs): + return unpacker(weighted_fun(*args, **kwargs)) + + m, n = 3, 20 + axis = -1 + + rng = np.random.default_rng(6541968121) + a = rng.uniform(size=(m, n)) + b = rng.uniform(size=(m, n)) + mask_a1 = rng.uniform(size=(m, n)) < 0.2 + mask_a2 = rng.uniform(size=(m, n)) < 0.1 + mask_b1 = rng.uniform(size=(m, n)) < 0.15 + mask_b2 = rng.uniform(size=(m, n)) < 0.15 + mask_a1[2, :] = True + + a_nans = a.copy() + b_nans = b.copy() + a_nans[mask_a1 | mask_a2] = np.nan + b_nans[mask_b1 | mask_b2] = np.nan + + a_masked1 = np.ma.masked_array(a, mask=mask_a1) + b_masked1 = np.ma.masked_array(b, mask=mask_b1) + a_masked1[mask_a2] = np.nan + b_masked1[mask_b2] = np.nan + + a_masked2 = np.ma.masked_array(a, mask=mask_a2) + b_masked2 = np.ma.masked_array(b, mask=mask_b2) + a_masked2[mask_a1] = np.nan + b_masked2[mask_b1] = np.nan + + a_masked3 = np.ma.masked_array(a, mask=(mask_a1 | mask_a2)) + b_masked3 = np.ma.masked_array(b, mask=(mask_b1 | mask_b2)) + + mask_all = (mask_a1 | mask_a2 | mask_b1 | mask_b2) + a_masked4 = np.ma.masked_array(a, mask=mask_all) + b_masked4 = np.ma.masked_array(b, mask=mask_all) + + with np.testing.suppress_warnings() as sup: + message = 'invalid value encountered' + sup.filter(RuntimeWarning, message) + res = func(a_nans, weights=b_nans, nan_policy="omit", axis=axis) + res1 = func(a_masked1, weights=b_masked1, nan_policy="omit", axis=axis) + res2 = func(a_masked2, weights=b_masked2, nan_policy="omit", axis=axis) + res3 = func(a_masked3, weights=b_masked3, nan_policy="raise", axis=axis) + res4 = func(a_masked3, weights=b_masked3, nan_policy="propagate", axis=axis) + # Would test with a_masked3/b_masked3, but there is a bug in np.average + # that causes a bug in _no_deco mean with masked weights. Would use + # np.ma.average, but that causes other problems. See numpy/numpy#7330. + if weighted_fun_name in {"hmean"}: + weighted_fun_ma = getattr(stats.mstats, weighted_fun_name) + res5 = weighted_fun_ma(a_masked4, weights=b_masked4, + axis=axis, _no_deco=True) + + np.testing.assert_array_equal(res1, res) + np.testing.assert_array_equal(res2, res) + np.testing.assert_array_equal(res3, res) + np.testing.assert_array_equal(res4, res) + if weighted_fun_name in {"hmean"}: + # _no_deco mean returns masked array, last element was masked + np.testing.assert_allclose(res5.compressed(), res[~np.isnan(res)]) + + +def test_raise_invalid_args_g17713(): + # other cases are handled in: + # test_axis_nan_policy_decorated_positional_axis - multiple values for arg + # test_axis_nan_policy_decorated_positional_args - unexpected kwd arg + message = "got an unexpected keyword argument" + with pytest.raises(TypeError, match=message): + stats.gmean([1, 2, 3], invalid_arg=True) + + message = " got multiple values for argument" + with pytest.raises(TypeError, match=message): + stats.gmean([1, 2, 3], a=True) + + message = "missing 1 required positional argument" + with pytest.raises(TypeError, match=message): + stats.gmean() + + message = "takes from 1 to 4 positional arguments but 5 were given" + with pytest.raises(TypeError, match=message): + stats.gmean([1, 2, 3], 0, float, [1, 1, 1], 10) + + +@pytest.mark.parametrize('dtype', [np.int16, np.float32, np.complex128]) +def test_array_like_input(dtype): + # Check that `_axis_nan_policy`-decorated functions work with custom + # containers that are coercible to numeric arrays + + class ArrLike: + def __init__(self, x, dtype): + self._x = x + self._dtype = dtype + + def __array__(self, dtype=None, copy=None): + return np.asarray(x, dtype=self._dtype) + + x = [1]*2 + [3, 4, 5] + res = stats.mode(ArrLike(x, dtype=dtype)) + assert res.mode == 1 + assert res.count == 2 diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py b/.venv/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py new file mode 100644 index 0000000000000000000000000000000000000000..3a93a07e5422838d62aea9a6bc559901ee866d7a --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_binned_statistic.py @@ -0,0 +1,568 @@ +import numpy as np +from numpy.testing import assert_allclose +import pytest +from pytest import raises as assert_raises +from scipy.stats import (binned_statistic, binned_statistic_2d, + binned_statistic_dd) +from scipy._lib._util import check_random_state + +from .common_tests import check_named_results + + +class TestBinnedStatistic: + + @classmethod + def setup_class(cls): + rng = check_random_state(9865) + cls.x = rng.uniform(size=100) + cls.y = rng.uniform(size=100) + cls.v = rng.uniform(size=100) + cls.X = rng.uniform(size=(100, 3)) + cls.w = rng.uniform(size=100) + cls.u = rng.uniform(size=100) + 1e6 + + def test_1d_count(self): + x = self.x + v = self.v + + count1, edges1, bc = binned_statistic(x, v, 'count', bins=10) + count2, edges2 = np.histogram(x, bins=10) + + assert_allclose(count1, count2) + assert_allclose(edges1, edges2) + + def test_gh5927(self): + # smoke test for gh5927 - binned_statistic was using `is` for string + # comparison + x = self.x + v = self.v + statistics = ['mean', 'median', 'count', 'sum'] + for statistic in statistics: + binned_statistic(x, v, statistic, bins=10) + + def test_big_number_std(self): + # tests for numerical stability of std calculation + # see issue gh-10126 for more + x = self.x + u = self.u + stat1, edges1, bc = binned_statistic(x, u, 'std', bins=10) + stat2, edges2, bc = binned_statistic(x, u, np.std, bins=10) + + assert_allclose(stat1, stat2) + + def test_empty_bins_std(self): + # tests that std returns gives nan for empty bins + x = self.x + u = self.u + print(binned_statistic(x, u, 'count', bins=1000)) + stat1, edges1, bc = binned_statistic(x, u, 'std', bins=1000) + stat2, edges2, bc = binned_statistic(x, u, np.std, bins=1000) + + assert_allclose(stat1, stat2) + + def test_non_finite_inputs_and_int_bins(self): + # if either `values` or `sample` contain np.inf or np.nan throw + # see issue gh-9010 for more + x = self.x + u = self.u + orig = u[0] + u[0] = np.inf + assert_raises(ValueError, binned_statistic, u, x, 'std', bins=10) + # need to test for non-python specific ints, e.g. np.int8, np.int64 + assert_raises(ValueError, binned_statistic, u, x, 'std', + bins=np.int64(10)) + u[0] = np.nan + assert_raises(ValueError, binned_statistic, u, x, 'count', bins=10) + # replace original value, u belongs the class + u[0] = orig + + def test_1d_result_attributes(self): + x = self.x + v = self.v + + res = binned_statistic(x, v, 'count', bins=10) + attributes = ('statistic', 'bin_edges', 'binnumber') + check_named_results(res, attributes) + + def test_1d_sum(self): + x = self.x + v = self.v + + sum1, edges1, bc = binned_statistic(x, v, 'sum', bins=10) + sum2, edges2 = np.histogram(x, bins=10, weights=v) + + assert_allclose(sum1, sum2) + assert_allclose(edges1, edges2) + + def test_1d_mean(self): + x = self.x + v = self.v + + stat1, edges1, bc = binned_statistic(x, v, 'mean', bins=10) + stat2, edges2, bc = binned_statistic(x, v, np.mean, bins=10) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_1d_std(self): + x = self.x + v = self.v + + stat1, edges1, bc = binned_statistic(x, v, 'std', bins=10) + stat2, edges2, bc = binned_statistic(x, v, np.std, bins=10) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_1d_min(self): + x = self.x + v = self.v + + stat1, edges1, bc = binned_statistic(x, v, 'min', bins=10) + stat2, edges2, bc = binned_statistic(x, v, np.min, bins=10) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_1d_max(self): + x = self.x + v = self.v + + stat1, edges1, bc = binned_statistic(x, v, 'max', bins=10) + stat2, edges2, bc = binned_statistic(x, v, np.max, bins=10) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_1d_median(self): + x = self.x + v = self.v + + stat1, edges1, bc = binned_statistic(x, v, 'median', bins=10) + stat2, edges2, bc = binned_statistic(x, v, np.median, bins=10) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_1d_bincode(self): + x = self.x[:20] + v = self.v[:20] + + count1, edges1, bc = binned_statistic(x, v, 'count', bins=3) + bc2 = np.array([3, 2, 1, 3, 2, 3, 3, 3, 3, 1, 1, 3, 3, 1, 2, 3, 1, + 1, 2, 1]) + + bcount = [(bc == i).sum() for i in np.unique(bc)] + + assert_allclose(bc, bc2) + assert_allclose(bcount, count1) + + def test_1d_range_keyword(self): + # Regression test for gh-3063, range can be (min, max) or [(min, max)] + np.random.seed(9865) + x = np.arange(30) + data = np.random.random(30) + + mean, bins, _ = binned_statistic(x[:15], data[:15]) + mean_range, bins_range, _ = binned_statistic(x, data, range=[(0, 14)]) + mean_range2, bins_range2, _ = binned_statistic(x, data, range=(0, 14)) + + assert_allclose(mean, mean_range) + assert_allclose(bins, bins_range) + assert_allclose(mean, mean_range2) + assert_allclose(bins, bins_range2) + + def test_1d_multi_values(self): + x = self.x + v = self.v + w = self.w + + stat1v, edges1v, bc1v = binned_statistic(x, v, 'mean', bins=10) + stat1w, edges1w, bc1w = binned_statistic(x, w, 'mean', bins=10) + stat2, edges2, bc2 = binned_statistic(x, [v, w], 'mean', bins=10) + + assert_allclose(stat2[0], stat1v) + assert_allclose(stat2[1], stat1w) + assert_allclose(edges1v, edges2) + assert_allclose(bc1v, bc2) + + def test_2d_count(self): + x = self.x + y = self.y + v = self.v + + count1, binx1, biny1, bc = binned_statistic_2d( + x, y, v, 'count', bins=5) + count2, binx2, biny2 = np.histogram2d(x, y, bins=5) + + assert_allclose(count1, count2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_result_attributes(self): + x = self.x + y = self.y + v = self.v + + res = binned_statistic_2d(x, y, v, 'count', bins=5) + attributes = ('statistic', 'x_edge', 'y_edge', 'binnumber') + check_named_results(res, attributes) + + def test_2d_sum(self): + x = self.x + y = self.y + v = self.v + + sum1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'sum', bins=5) + sum2, binx2, biny2 = np.histogram2d(x, y, bins=5, weights=v) + + assert_allclose(sum1, sum2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_mean(self): + x = self.x + y = self.y + v = self.v + + stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'mean', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5) + + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_mean_unicode(self): + x = self.x + y = self.y + v = self.v + stat1, binx1, biny1, bc = binned_statistic_2d( + x, y, v, 'mean', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.mean, bins=5) + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_std(self): + x = self.x + y = self.y + v = self.v + + stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'std', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.std, bins=5) + + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_min(self): + x = self.x + y = self.y + v = self.v + + stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'min', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.min, bins=5) + + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_max(self): + x = self.x + y = self.y + v = self.v + + stat1, binx1, biny1, bc = binned_statistic_2d(x, y, v, 'max', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d(x, y, v, np.max, bins=5) + + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_median(self): + x = self.x + y = self.y + v = self.v + + stat1, binx1, biny1, bc = binned_statistic_2d( + x, y, v, 'median', bins=5) + stat2, binx2, biny2, bc = binned_statistic_2d( + x, y, v, np.median, bins=5) + + assert_allclose(stat1, stat2) + assert_allclose(binx1, binx2) + assert_allclose(biny1, biny2) + + def test_2d_bincode(self): + x = self.x[:20] + y = self.y[:20] + v = self.v[:20] + + count1, binx1, biny1, bc = binned_statistic_2d( + x, y, v, 'count', bins=3) + bc2 = np.array([17, 11, 6, 16, 11, 17, 18, 17, 17, 7, 6, 18, 16, + 6, 11, 16, 6, 6, 11, 8]) + + bcount = [(bc == i).sum() for i in np.unique(bc)] + + assert_allclose(bc, bc2) + count1adj = count1[count1.nonzero()] + assert_allclose(bcount, count1adj) + + def test_2d_multi_values(self): + x = self.x + y = self.y + v = self.v + w = self.w + + stat1v, binx1v, biny1v, bc1v = binned_statistic_2d( + x, y, v, 'mean', bins=8) + stat1w, binx1w, biny1w, bc1w = binned_statistic_2d( + x, y, w, 'mean', bins=8) + stat2, binx2, biny2, bc2 = binned_statistic_2d( + x, y, [v, w], 'mean', bins=8) + + assert_allclose(stat2[0], stat1v) + assert_allclose(stat2[1], stat1w) + assert_allclose(binx1v, binx2) + assert_allclose(biny1w, biny2) + assert_allclose(bc1v, bc2) + + def test_2d_binnumbers_unraveled(self): + x = self.x + y = self.y + v = self.v + + stat, edgesx, bcx = binned_statistic(x, v, 'mean', bins=20) + stat, edgesy, bcy = binned_statistic(y, v, 'mean', bins=10) + + stat2, edgesx2, edgesy2, bc2 = binned_statistic_2d( + x, y, v, 'mean', bins=(20, 10), expand_binnumbers=True) + + bcx3 = np.searchsorted(edgesx, x, side='right') + bcy3 = np.searchsorted(edgesy, y, side='right') + + # `numpy.searchsorted` is non-inclusive on right-edge, compensate + bcx3[x == x.max()] -= 1 + bcy3[y == y.max()] -= 1 + + assert_allclose(bcx, bc2[0]) + assert_allclose(bcy, bc2[1]) + assert_allclose(bcx3, bc2[0]) + assert_allclose(bcy3, bc2[1]) + + def test_dd_count(self): + X = self.X + v = self.v + + count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3) + count2, edges2 = np.histogramdd(X, bins=3) + + assert_allclose(count1, count2) + assert_allclose(edges1, edges2) + + def test_dd_result_attributes(self): + X = self.X + v = self.v + + res = binned_statistic_dd(X, v, 'count', bins=3) + attributes = ('statistic', 'bin_edges', 'binnumber') + check_named_results(res, attributes) + + def test_dd_sum(self): + X = self.X + v = self.v + + sum1, edges1, bc = binned_statistic_dd(X, v, 'sum', bins=3) + sum2, edges2 = np.histogramdd(X, bins=3, weights=v) + sum3, edges3, bc = binned_statistic_dd(X, v, np.sum, bins=3) + + assert_allclose(sum1, sum2) + assert_allclose(edges1, edges2) + assert_allclose(sum1, sum3) + assert_allclose(edges1, edges3) + + def test_dd_mean(self): + X = self.X + v = self.v + + stat1, edges1, bc = binned_statistic_dd(X, v, 'mean', bins=3) + stat2, edges2, bc = binned_statistic_dd(X, v, np.mean, bins=3) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_dd_std(self): + X = self.X + v = self.v + + stat1, edges1, bc = binned_statistic_dd(X, v, 'std', bins=3) + stat2, edges2, bc = binned_statistic_dd(X, v, np.std, bins=3) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_dd_min(self): + X = self.X + v = self.v + + stat1, edges1, bc = binned_statistic_dd(X, v, 'min', bins=3) + stat2, edges2, bc = binned_statistic_dd(X, v, np.min, bins=3) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_dd_max(self): + X = self.X + v = self.v + + stat1, edges1, bc = binned_statistic_dd(X, v, 'max', bins=3) + stat2, edges2, bc = binned_statistic_dd(X, v, np.max, bins=3) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_dd_median(self): + X = self.X + v = self.v + + stat1, edges1, bc = binned_statistic_dd(X, v, 'median', bins=3) + stat2, edges2, bc = binned_statistic_dd(X, v, np.median, bins=3) + + assert_allclose(stat1, stat2) + assert_allclose(edges1, edges2) + + def test_dd_bincode(self): + X = self.X[:20] + v = self.v[:20] + + count1, edges1, bc = binned_statistic_dd(X, v, 'count', bins=3) + bc2 = np.array([63, 33, 86, 83, 88, 67, 57, 33, 42, 41, 82, 83, 92, + 32, 36, 91, 43, 87, 81, 81]) + + bcount = [(bc == i).sum() for i in np.unique(bc)] + + assert_allclose(bc, bc2) + count1adj = count1[count1.nonzero()] + assert_allclose(bcount, count1adj) + + def test_dd_multi_values(self): + X = self.X + v = self.v + w = self.w + + for stat in ["count", "sum", "mean", "std", "min", "max", "median", + np.std]: + stat1v, edges1v, bc1v = binned_statistic_dd(X, v, stat, bins=8) + stat1w, edges1w, bc1w = binned_statistic_dd(X, w, stat, bins=8) + stat2, edges2, bc2 = binned_statistic_dd(X, [v, w], stat, bins=8) + assert_allclose(stat2[0], stat1v) + assert_allclose(stat2[1], stat1w) + assert_allclose(edges1v, edges2) + assert_allclose(edges1w, edges2) + assert_allclose(bc1v, bc2) + + def test_dd_binnumbers_unraveled(self): + X = self.X + v = self.v + + stat, edgesx, bcx = binned_statistic(X[:, 0], v, 'mean', bins=15) + stat, edgesy, bcy = binned_statistic(X[:, 1], v, 'mean', bins=20) + stat, edgesz, bcz = binned_statistic(X[:, 2], v, 'mean', bins=10) + + stat2, edges2, bc2 = binned_statistic_dd( + X, v, 'mean', bins=(15, 20, 10), expand_binnumbers=True) + + assert_allclose(bcx, bc2[0]) + assert_allclose(bcy, bc2[1]) + assert_allclose(bcz, bc2[2]) + + def test_dd_binned_statistic_result(self): + # NOTE: tests the reuse of bin_edges from previous call + x = np.random.random((10000, 3)) + v = np.random.random(10000) + bins = np.linspace(0, 1, 10) + bins = (bins, bins, bins) + + result = binned_statistic_dd(x, v, 'mean', bins=bins) + stat = result.statistic + + result = binned_statistic_dd(x, v, 'mean', + binned_statistic_result=result) + stat2 = result.statistic + + assert_allclose(stat, stat2) + + def test_dd_zero_dedges(self): + x = np.random.random((10000, 3)) + v = np.random.random(10000) + bins = np.linspace(0, 1, 10) + bins = np.append(bins, 1) + bins = (bins, bins, bins) + with assert_raises(ValueError, match='difference is numerically 0'): + binned_statistic_dd(x, v, 'mean', bins=bins) + + def test_dd_range_errors(self): + # Test that descriptive exceptions are raised as appropriate for bad + # values of the `range` argument. (See gh-12996) + with assert_raises(ValueError, + match='In range, start must be <= stop'): + binned_statistic_dd([self.y], self.v, + range=[[1, 0]]) + with assert_raises( + ValueError, + match='In dimension 1 of range, start must be <= stop'): + binned_statistic_dd([self.x, self.y], self.v, + range=[[1, 0], [0, 1]]) + with assert_raises( + ValueError, + match='In dimension 2 of range, start must be <= stop'): + binned_statistic_dd([self.x, self.y], self.v, + range=[[0, 1], [1, 0]]) + with assert_raises( + ValueError, + match='range given for 1 dimensions; 2 required'): + binned_statistic_dd([self.x, self.y], self.v, + range=[[0, 1]]) + + def test_binned_statistic_float32(self): + X = np.array([0, 0.42358226], dtype=np.float32) + stat, _, _ = binned_statistic(X, None, 'count', bins=5) + assert_allclose(stat, np.array([1, 0, 0, 0, 1], dtype=np.float64)) + + def test_gh14332(self): + # Test the wrong output when the `sample` is close to bin edge + x = [] + size = 20 + for i in range(size): + x += [1-0.1**i] + + bins = np.linspace(0,1,11) + sum1, edges1, bc = binned_statistic_dd(x, np.ones(len(x)), + bins=[bins], statistic='sum') + sum2, edges2 = np.histogram(x, bins=bins) + + assert_allclose(sum1, sum2) + assert_allclose(edges1[0], edges2) + + @pytest.mark.parametrize("dtype", [np.float64, np.complex128]) + @pytest.mark.parametrize("statistic", [np.mean, np.median, np.sum, np.std, + np.min, np.max, 'count', + lambda x: (x**2).sum(), + lambda x: (x**2).sum() * 1j]) + def test_dd_all(self, dtype, statistic): + def ref_statistic(x): + return len(x) if statistic == 'count' else statistic(x) + + rng = np.random.default_rng(3704743126639371) + n = 10 + x = rng.random(size=n) + i = x >= 0.5 + v = rng.random(size=n) + if dtype is np.complex128: + v = v + rng.random(size=n)*1j + + stat, _, _ = binned_statistic_dd(x, v, statistic, bins=2) + ref = np.array([ref_statistic(v[~i]), ref_statistic(v[i])]) + assert_allclose(stat, ref) + assert stat.dtype == np.result_type(ref.dtype, np.float64) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_boost_ufuncs.py b/.venv/Lib/site-packages/scipy/stats/tests/test_boost_ufuncs.py new file mode 100644 index 0000000000000000000000000000000000000000..b644b04f7c72158bb79655ea9b5a26d56c523962 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_boost_ufuncs.py @@ -0,0 +1,47 @@ +import pytest +import numpy as np +from numpy.testing import assert_allclose +from scipy.stats import _boost + + +type_char_to_type_tol = {'f': (np.float32, 32*np.finfo(np.float32).eps), + 'd': (np.float64, 32*np.finfo(np.float64).eps)} + + +# Each item in this list is +# (func, args, expected_value) +# All the values can be represented exactly, even with np.float32. +# +# This is not an exhaustive test data set of all the functions! +# It is a spot check of several functions, primarily for +# checking that the different data types are handled correctly. +test_data = [ + (_boost._beta_cdf, (0.5, 2, 3), 0.6875), + (_boost._beta_ppf, (0.6875, 2, 3), 0.5), + (_boost._beta_pdf, (0.5, 2, 3), 1.5), + (_boost._beta_pdf, (0, 1, 5), 5.0), + (_boost._beta_pdf, (1, 5, 1), 5.0), + (_boost._beta_sf, (0.5, 2, 1), 0.75), + (_boost._beta_isf, (0.75, 2, 1), 0.5), + (_boost._binom_cdf, (1, 3, 0.5), 0.5), + (_boost._binom_pdf, (1, 4, 0.5), 0.25), + (_boost._hypergeom_cdf, (2, 3, 5, 6), 0.5), + (_boost._nbinom_cdf, (1, 4, 0.25), 0.015625), + (_boost._ncf_mean, (10, 12, 2.5), 1.5), +] + + +@pytest.mark.parametrize('func, args, expected', test_data) +def test_stats_boost_ufunc(func, args, expected): + type_sigs = func.types + type_chars = [sig.split('->')[-1] for sig in type_sigs] + for type_char in type_chars: + typ, rtol = type_char_to_type_tol[type_char] + args = [typ(arg) for arg in args] + # Harmless overflow warnings are a "feature" of some wrappers on some + # platforms. This test is about dtype and accuracy, so let's avoid false + # test failures cause by these warnings. See gh-17432. + with np.errstate(over='ignore'): + value = func(*args) + assert isinstance(value, typ) + assert_allclose(value, expected, rtol=rtol) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_censored_data.py b/.venv/Lib/site-packages/scipy/stats/tests/test_censored_data.py new file mode 100644 index 0000000000000000000000000000000000000000..1bf3d2db41ab821e1575b503b3e7c7566673fbe8 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_censored_data.py @@ -0,0 +1,152 @@ +# Tests for the CensoredData class. + +import pytest +import numpy as np +from numpy.testing import assert_equal, assert_array_equal +from scipy.stats import CensoredData + + +class TestCensoredData: + + def test_basic(self): + uncensored = [1] + left = [0] + right = [2, 5] + interval = [[2, 3]] + data = CensoredData(uncensored, left=left, right=right, + interval=interval) + assert_equal(data._uncensored, uncensored) + assert_equal(data._left, left) + assert_equal(data._right, right) + assert_equal(data._interval, interval) + + udata = data._uncensor() + assert_equal(udata, np.concatenate((uncensored, left, right, + np.mean(interval, axis=1)))) + + def test_right_censored(self): + x = np.array([0, 3, 2.5]) + is_censored = np.array([0, 1, 0], dtype=bool) + data = CensoredData.right_censored(x, is_censored) + assert_equal(data._uncensored, x[~is_censored]) + assert_equal(data._right, x[is_censored]) + assert_equal(data._left, []) + assert_equal(data._interval, np.empty((0, 2))) + + def test_left_censored(self): + x = np.array([0, 3, 2.5]) + is_censored = np.array([0, 1, 0], dtype=bool) + data = CensoredData.left_censored(x, is_censored) + assert_equal(data._uncensored, x[~is_censored]) + assert_equal(data._left, x[is_censored]) + assert_equal(data._right, []) + assert_equal(data._interval, np.empty((0, 2))) + + def test_interval_censored_basic(self): + a = [0.5, 2.0, 3.0, 5.5] + b = [1.0, 2.5, 3.5, 7.0] + data = CensoredData.interval_censored(low=a, high=b) + assert_array_equal(data._interval, np.array(list(zip(a, b)))) + assert data._uncensored.shape == (0,) + assert data._left.shape == (0,) + assert data._right.shape == (0,) + + def test_interval_censored_mixed(self): + # This is actually a mix of uncensored, left-censored, right-censored + # and interval-censored data. Check that when the `interval_censored` + # class method is used, the data is correctly separated into the + # appropriate arrays. + a = [0.5, -np.inf, -13.0, 2.0, 1.0, 10.0, -1.0] + b = [0.5, 2500.0, np.inf, 3.0, 1.0, 11.0, np.inf] + data = CensoredData.interval_censored(low=a, high=b) + assert_array_equal(data._interval, [[2.0, 3.0], [10.0, 11.0]]) + assert_array_equal(data._uncensored, [0.5, 1.0]) + assert_array_equal(data._left, [2500.0]) + assert_array_equal(data._right, [-13.0, -1.0]) + + def test_interval_to_other_types(self): + # The interval parameter can represent uncensored and + # left- or right-censored data. Test the conversion of such + # an example to the canonical form in which the different + # types have been split into the separate arrays. + interval = np.array([[0, 1], # interval-censored + [2, 2], # not censored + [3, 3], # not censored + [9, np.inf], # right-censored + [8, np.inf], # right-censored + [-np.inf, 0], # left-censored + [1, 2]]) # interval-censored + data = CensoredData(interval=interval) + assert_equal(data._uncensored, [2, 3]) + assert_equal(data._left, [0]) + assert_equal(data._right, [9, 8]) + assert_equal(data._interval, [[0, 1], [1, 2]]) + + def test_empty_arrays(self): + data = CensoredData(uncensored=[], left=[], right=[], interval=[]) + assert data._uncensored.shape == (0,) + assert data._left.shape == (0,) + assert data._right.shape == (0,) + assert data._interval.shape == (0, 2) + assert len(data) == 0 + + def test_invalid_constructor_args(self): + with pytest.raises(ValueError, match='must be a one-dimensional'): + CensoredData(uncensored=[[1, 2, 3]]) + with pytest.raises(ValueError, match='must be a one-dimensional'): + CensoredData(left=[[1, 2, 3]]) + with pytest.raises(ValueError, match='must be a one-dimensional'): + CensoredData(right=[[1, 2, 3]]) + with pytest.raises(ValueError, match='must be a two-dimensional'): + CensoredData(interval=[[1, 2, 3]]) + + with pytest.raises(ValueError, match='must not contain nan'): + CensoredData(uncensored=[1, np.nan, 2]) + with pytest.raises(ValueError, match='must not contain nan'): + CensoredData(left=[1, np.nan, 2]) + with pytest.raises(ValueError, match='must not contain nan'): + CensoredData(right=[1, np.nan, 2]) + with pytest.raises(ValueError, match='must not contain nan'): + CensoredData(interval=[[1, np.nan], [2, 3]]) + + with pytest.raises(ValueError, + match='both values must not be infinite'): + CensoredData(interval=[[1, 3], [2, 9], [np.inf, np.inf]]) + + with pytest.raises(ValueError, + match='left value must not exceed the right'): + CensoredData(interval=[[1, 0], [2, 2]]) + + @pytest.mark.parametrize('func', [CensoredData.left_censored, + CensoredData.right_censored]) + def test_invalid_left_right_censored_args(self, func): + with pytest.raises(ValueError, + match='`x` must be one-dimensional'): + func([[1, 2, 3]], [0, 1, 1]) + with pytest.raises(ValueError, + match='`censored` must be one-dimensional'): + func([1, 2, 3], [[0, 1, 1]]) + with pytest.raises(ValueError, match='`x` must not contain'): + func([1, 2, np.nan], [0, 1, 1]) + with pytest.raises(ValueError, match='must have the same length'): + func([1, 2, 3], [0, 0, 1, 1]) + + def test_invalid_censored_args(self): + with pytest.raises(ValueError, + match='`low` must be a one-dimensional'): + CensoredData.interval_censored(low=[[3]], high=[4, 5]) + with pytest.raises(ValueError, + match='`high` must be a one-dimensional'): + CensoredData.interval_censored(low=[3], high=[[4, 5]]) + with pytest.raises(ValueError, match='`low` must not contain'): + CensoredData.interval_censored([1, 2, np.nan], [0, 1, 1]) + with pytest.raises(ValueError, match='must have the same length'): + CensoredData.interval_censored([1, 2, 3], [0, 0, 1, 1]) + + def test_count_censored(self): + x = [1, 2, 3] + # data1 has no censored data. + data1 = CensoredData(x) + assert data1.num_censored() == 0 + data2 = CensoredData(uncensored=[2.5], left=[10], interval=[[0, 1]]) + assert data2.num_censored() == 2 diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_contingency.py b/.venv/Lib/site-packages/scipy/stats/tests/test_contingency.py new file mode 100644 index 0000000000000000000000000000000000000000..a652f035709ee2f7307e9a51d000c5f6a15626bb --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_contingency.py @@ -0,0 +1,241 @@ +import numpy as np +from numpy.testing import (assert_equal, assert_array_equal, + assert_array_almost_equal, assert_approx_equal, + assert_allclose) +import pytest +from pytest import raises as assert_raises +from scipy.special import xlogy +from scipy.stats.contingency import (margins, expected_freq, + chi2_contingency, association) + + +def test_margins(): + a = np.array([1]) + m = margins(a) + assert_equal(len(m), 1) + m0 = m[0] + assert_array_equal(m0, np.array([1])) + + a = np.array([[1]]) + m0, m1 = margins(a) + expected0 = np.array([[1]]) + expected1 = np.array([[1]]) + assert_array_equal(m0, expected0) + assert_array_equal(m1, expected1) + + a = np.arange(12).reshape(2, 6) + m0, m1 = margins(a) + expected0 = np.array([[15], [51]]) + expected1 = np.array([[6, 8, 10, 12, 14, 16]]) + assert_array_equal(m0, expected0) + assert_array_equal(m1, expected1) + + a = np.arange(24).reshape(2, 3, 4) + m0, m1, m2 = margins(a) + expected0 = np.array([[[66]], [[210]]]) + expected1 = np.array([[[60], [92], [124]]]) + expected2 = np.array([[[60, 66, 72, 78]]]) + assert_array_equal(m0, expected0) + assert_array_equal(m1, expected1) + assert_array_equal(m2, expected2) + + +def test_expected_freq(): + assert_array_equal(expected_freq([1]), np.array([1.0])) + + observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]]) + e = expected_freq(observed) + assert_array_equal(e, np.ones_like(observed)) + + observed = np.array([[10, 10, 20], [20, 20, 20]]) + e = expected_freq(observed) + correct = np.array([[12., 12., 16.], [18., 18., 24.]]) + assert_array_almost_equal(e, correct) + + +def test_chi2_contingency_trivial(): + # Some very simple tests for chi2_contingency. + + # A trivial case + obs = np.array([[1, 2], [1, 2]]) + chi2, p, dof, expected = chi2_contingency(obs, correction=False) + assert_equal(chi2, 0.0) + assert_equal(p, 1.0) + assert_equal(dof, 1) + assert_array_equal(obs, expected) + + # A *really* trivial case: 1-D data. + obs = np.array([1, 2, 3]) + chi2, p, dof, expected = chi2_contingency(obs, correction=False) + assert_equal(chi2, 0.0) + assert_equal(p, 1.0) + assert_equal(dof, 0) + assert_array_equal(obs, expected) + + +def test_chi2_contingency_R(): + # Some test cases that were computed independently, using R. + + # Rcode = \ + # """ + # # Data vector. + # data <- c( + # 12, 34, 23, 4, 47, 11, + # 35, 31, 11, 34, 10, 18, + # 12, 32, 9, 18, 13, 19, + # 12, 12, 14, 9, 33, 25 + # ) + # + # # Create factor tags:r=rows, c=columns, t=tiers + # r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4"))) + # c <- factor(gl(3, 1, 2*3*4, labels=c("c1", "c2", "c3"))) + # t <- factor(gl(2, 3, 2*3*4, labels=c("t1", "t2"))) + # + # # 3-way Chi squared test of independence + # s = summary(xtabs(data~r+c+t)) + # print(s) + # """ + # Routput = \ + # """ + # Call: xtabs(formula = data ~ r + c + t) + # Number of cases in table: 478 + # Number of factors: 3 + # Test for independence of all factors: + # Chisq = 102.17, df = 17, p-value = 3.514e-14 + # """ + obs = np.array( + [[[12, 34, 23], + [35, 31, 11], + [12, 32, 9], + [12, 12, 14]], + [[4, 47, 11], + [34, 10, 18], + [18, 13, 19], + [9, 33, 25]]]) + chi2, p, dof, expected = chi2_contingency(obs) + assert_approx_equal(chi2, 102.17, significant=5) + assert_approx_equal(p, 3.514e-14, significant=4) + assert_equal(dof, 17) + + # Rcode = \ + # """ + # # Data vector. + # data <- c( + # # + # 12, 17, + # 11, 16, + # # + # 11, 12, + # 15, 16, + # # + # 23, 15, + # 30, 22, + # # + # 14, 17, + # 15, 16 + # ) + # + # # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers + # r <- factor(gl(2, 2, 2*2*2*2, labels=c("r1", "r2"))) + # c <- factor(gl(2, 1, 2*2*2*2, labels=c("c1", "c2"))) + # d <- factor(gl(2, 4, 2*2*2*2, labels=c("d1", "d2"))) + # t <- factor(gl(2, 8, 2*2*2*2, labels=c("t1", "t2"))) + # + # # 4-way Chi squared test of independence + # s = summary(xtabs(data~r+c+d+t)) + # print(s) + # """ + # Routput = \ + # """ + # Call: xtabs(formula = data ~ r + c + d + t) + # Number of cases in table: 262 + # Number of factors: 4 + # Test for independence of all factors: + # Chisq = 8.758, df = 11, p-value = 0.6442 + # """ + obs = np.array( + [[[[12, 17], + [11, 16]], + [[11, 12], + [15, 16]]], + [[[23, 15], + [30, 22]], + [[14, 17], + [15, 16]]]]) + chi2, p, dof, expected = chi2_contingency(obs) + assert_approx_equal(chi2, 8.758, significant=4) + assert_approx_equal(p, 0.6442, significant=4) + assert_equal(dof, 11) + + +def test_chi2_contingency_g(): + c = np.array([[15, 60], [15, 90]]) + g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', + correction=False) + assert_allclose(g, 2*xlogy(c, c/e).sum()) + + g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood', + correction=True) + c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]]) + assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum()) + + c = np.array([[10, 12, 10], [12, 10, 10]]) + g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood') + assert_allclose(g, 2*xlogy(c, c/e).sum()) + + +def test_chi2_contingency_bad_args(): + # Test that "bad" inputs raise a ValueError. + + # Negative value in the array of observed frequencies. + obs = np.array([[-1, 10], [1, 2]]) + assert_raises(ValueError, chi2_contingency, obs) + + # The zeros in this will result in zeros in the array + # of expected frequencies. + obs = np.array([[0, 1], [0, 1]]) + assert_raises(ValueError, chi2_contingency, obs) + + # A degenerate case: `observed` has size 0. + obs = np.empty((0, 8)) + assert_raises(ValueError, chi2_contingency, obs) + + +def test_chi2_contingency_yates_gh13875(): + # Magnitude of Yates' continuity correction should not exceed difference + # between expected and observed value of the statistic; see gh-13875 + observed = np.array([[1573, 3], [4, 0]]) + p = chi2_contingency(observed)[1] + assert_allclose(p, 1, rtol=1e-12) + + +@pytest.mark.parametrize("correction", [False, True]) +def test_result(correction): + obs = np.array([[1, 2], [1, 2]]) + res = chi2_contingency(obs, correction=correction) + assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res) + + +def test_bad_association_args(): + # Invalid Test Statistic + assert_raises(ValueError, association, [[1, 2], [3, 4]], "X") + # Invalid array shape + assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer") + # chi2_contingency exception + assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer') + # Invalid Array Item Data Type + assert_raises(ValueError, association, + np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer') + + +@pytest.mark.parametrize('stat, expected', + [('cramer', 0.09222412010290792), + ('tschuprow', 0.0775509319944633), + ('pearson', 0.12932925727138758)]) +def test_assoc(stat, expected): + # 2d Array + obs1 = np.array([[12, 13, 14, 15, 16], + [17, 16, 18, 19, 11], + [9, 15, 14, 12, 11]]) + a = association(observed=obs1, method=stat) + assert_allclose(a, expected) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py b/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..0ca5a80c677b24d286087c5e3f5d33b3b9b493ac --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_basic.py @@ -0,0 +1,1016 @@ +import sys +import numpy as np +import numpy.testing as npt +import pytest +from pytest import raises as assert_raises +from scipy.integrate import IntegrationWarning +import itertools + +from scipy import stats +from .common_tests import (check_normalization, check_moment, + check_mean_expect, + check_var_expect, check_skew_expect, + check_kurt_expect, check_entropy, + check_private_entropy, check_entropy_vect_scale, + check_edge_support, check_named_args, + check_random_state_property, + check_meth_dtype, check_ppf_dtype, + check_cmplx_deriv, + check_pickling, check_rvs_broadcast, + check_freezing, check_munp_expect,) +from scipy.stats._distr_params import distcont +from scipy.stats._distn_infrastructure import rv_continuous_frozen + +""" +Test all continuous distributions. + +Parameters were chosen for those distributions that pass the +Kolmogorov-Smirnov test. This provides safe parameters for each +distributions so that we can perform further testing of class methods. + +These tests currently check only/mostly for serious errors and exceptions, +not for numerically exact results. +""" + +# Note that you need to add new distributions you want tested +# to _distr_params + +DECIMAL = 5 # specify the precision of the tests # increased from 0 to 5 +_IS_32BIT = (sys.maxsize < 2**32) + +# For skipping test_cont_basic +distslow = ['recipinvgauss', 'vonmises', 'kappa4', 'vonmises_line', + 'gausshyper', 'norminvgauss', 'geninvgauss', 'genhyperbolic', + 'truncnorm', 'truncweibull_min'] + +# distxslow are sorted by speed (very slow to slow) +distxslow = ['studentized_range', 'kstwo', 'ksone', 'wrapcauchy', 'genexpon'] + +# For skipping test_moments, which is already marked slow +distxslow_test_moments = ['studentized_range', 'vonmises', 'vonmises_line', + 'ksone', 'kstwo', 'recipinvgauss', 'genexpon'] + +# skip check_fit_args (test is slow) +skip_fit_test_mle = ['exponpow', 'exponweib', 'gausshyper', 'genexpon', + 'halfgennorm', 'gompertz', 'johnsonsb', 'johnsonsu', + 'kappa4', 'ksone', 'kstwo', 'kstwobign', 'mielke', 'ncf', + 'nct', 'powerlognorm', 'powernorm', 'recipinvgauss', + 'trapezoid', 'vonmises', 'vonmises_line', 'levy_stable', + 'rv_histogram_instance', 'studentized_range'] + +# these were really slow in `test_fit`.py. +# note that this list is used to skip both fit_test and fit_fix tests +slow_fit_test_mm = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon', + 'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb', + 'kappa4', 'kstwobign', 'recipinvgauss', + 'trapezoid', 'truncexpon', 'vonmises', 'vonmises_line', + 'studentized_range'] +# pearson3 fails due to something weird +# the first list fails due to non-finite distribution moments encountered +# most of the rest fail due to integration warnings +# pearson3 is overridden as not implemented due to gh-11746 +fail_fit_test_mm = (['alpha', 'betaprime', 'bradford', 'burr', 'burr12', + 'cauchy', 'crystalball', 'f', 'fisk', 'foldcauchy', + 'genextreme', 'genpareto', 'halfcauchy', 'invgamma', + 'jf_skew_t', 'kappa3', 'levy', 'levy_l', 'loglaplace', + 'lomax', 'mielke', 'nakagami', 'ncf', 'skewcauchy', 't', + 'tukeylambda', 'invweibull', 'rel_breitwigner'] + + ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo', + 'nct', 'pareto', 'powernorm', 'powerlognorm'] + + ['pearson3']) + +skip_fit_test = {"MLE": skip_fit_test_mle, + "MM": slow_fit_test_mm + fail_fit_test_mm} + +# skip check_fit_args_fix (test is slow) +skip_fit_fix_test_mle = ['burr', 'exponpow', 'exponweib', 'gausshyper', + 'genexpon', 'halfgennorm', 'gompertz', 'johnsonsb', + 'johnsonsu', 'kappa4', 'ksone', 'kstwo', 'kstwobign', + 'levy_stable', 'mielke', 'ncf', 'ncx2', + 'powerlognorm', 'powernorm', 'rdist', 'recipinvgauss', + 'trapezoid', 'truncpareto', 'vonmises', 'vonmises_line', + 'studentized_range'] +# the first list fails due to non-finite distribution moments encountered +# most of the rest fail due to integration warnings +# pearson3 is overridden as not implemented due to gh-11746 +fail_fit_fix_test_mm = (['alpha', 'betaprime', 'burr', 'burr12', 'cauchy', + 'crystalball', 'f', 'fisk', 'foldcauchy', + 'genextreme', 'genpareto', 'halfcauchy', 'invgamma', + 'jf_skew_t', 'kappa3', 'levy', 'levy_l', 'loglaplace', + 'lomax', 'mielke', 'nakagami', 'ncf', 'nct', + 'skewcauchy', 't', 'truncpareto', 'invweibull'] + + ['genhyperbolic', 'johnsonsu', 'ksone', 'kstwo', + 'pareto', 'powernorm', 'powerlognorm'] + + ['pearson3']) +skip_fit_fix_test = {"MLE": skip_fit_fix_test_mle, + "MM": slow_fit_test_mm + fail_fit_fix_test_mm} + +# These distributions fail the complex derivative test below. +# Here 'fail' mean produce wrong results and/or raise exceptions, depending +# on the implementation details of corresponding special functions. +# cf https://github.com/scipy/scipy/pull/4979 for a discussion. +fails_cmplx = {'argus', 'beta', 'betaprime', 'chi', 'chi2', 'cosine', + 'dgamma', 'dweibull', 'erlang', 'f', 'foldcauchy', 'gamma', + 'gausshyper', 'gengamma', 'genhyperbolic', + 'geninvgauss', 'gennorm', 'genpareto', + 'halfcauchy', 'halfgennorm', 'invgamma', 'jf_skew_t', + 'ksone', 'kstwo', 'kstwobign', 'levy_l', 'loggamma', + 'logistic', 'loguniform', 'maxwell', 'nakagami', + 'ncf', 'nct', 'ncx2', 'norminvgauss', 'pearson3', + 'powerlaw', 'rdist', 'reciprocal', 'rice', + 'skewnorm', 't', 'truncweibull_min', + 'tukeylambda', 'vonmises', 'vonmises_line', + 'rv_histogram_instance', 'truncnorm', 'studentized_range', + 'johnsonsb', 'halflogistic', 'rel_breitwigner'} + + +# rv_histogram instances, with uniform and non-uniform bins; +# stored as (dist, arg) tuples for cases_test_cont_basic +# and cases_test_moments. +histogram_test_instances = [] +case1 = {'a': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, + 6, 6, 6, 7, 7, 7, 8, 8, 9], 'bins': 8} # equal width bins +case2 = {'a': [1, 1], 'bins': [0, 1, 10]} # unequal width bins +for case, density in itertools.product([case1, case2], [True, False]): + _hist = np.histogram(**case, density=density) + _rv_hist = stats.rv_histogram(_hist, density=density) + histogram_test_instances.append((_rv_hist, tuple())) + + +def cases_test_cont_basic(): + for distname, arg in distcont[:] + histogram_test_instances: + if distname == 'levy_stable': + continue + elif distname in distslow: + yield pytest.param(distname, arg, marks=pytest.mark.slow) + elif distname in distxslow: + yield pytest.param(distname, arg, marks=pytest.mark.xslow) + else: + yield distname, arg + + +@pytest.mark.parametrize('distname,arg', cases_test_cont_basic()) +@pytest.mark.parametrize('sn, n_fit_samples', [(500, 200)]) +def test_cont_basic(distname, arg, sn, n_fit_samples): + # this test skips slow distributions + + try: + distfn = getattr(stats, distname) + except TypeError: + distfn = distname + distname = 'rv_histogram_instance' + + rng = np.random.RandomState(765456) + rvs = distfn.rvs(size=sn, *arg, random_state=rng) + m, v = distfn.stats(*arg) + + if distname not in {'laplace_asymmetric'}: + check_sample_meanvar_(m, v, rvs) + check_cdf_ppf(distfn, arg, distname) + check_sf_isf(distfn, arg, distname) + check_cdf_sf(distfn, arg, distname) + check_ppf_isf(distfn, arg, distname) + check_pdf(distfn, arg, distname) + check_pdf_logpdf(distfn, arg, distname) + check_pdf_logpdf_at_endpoints(distfn, arg, distname) + check_cdf_logcdf(distfn, arg, distname) + check_sf_logsf(distfn, arg, distname) + check_ppf_broadcast(distfn, arg, distname) + + alpha = 0.01 + if distname == 'rv_histogram_instance': + check_distribution_rvs(distfn.cdf, arg, alpha, rvs) + elif distname != 'geninvgauss': + # skip kstest for geninvgauss since cdf is too slow; see test for + # rv generation in TestGenInvGauss in test_distributions.py + check_distribution_rvs(distname, arg, alpha, rvs) + + locscale_defaults = (0, 1) + meths = [distfn.pdf, distfn.logpdf, distfn.cdf, distfn.logcdf, + distfn.logsf] + # make sure arguments are within support + spec_x = {'weibull_max': -0.5, 'levy_l': -0.5, + 'pareto': 1.5, 'truncpareto': 3.2, 'tukeylambda': 0.3, + 'rv_histogram_instance': 5.0} + x = spec_x.get(distname, 0.5) + if distname == 'invweibull': + arg = (1,) + elif distname == 'ksone': + arg = (3,) + + check_named_args(distfn, x, arg, locscale_defaults, meths) + check_random_state_property(distfn, arg) + + if distname in ['rel_breitwigner'] and _IS_32BIT: + # gh18414 + pytest.skip("fails on Linux 32-bit") + else: + check_pickling(distfn, arg) + check_freezing(distfn, arg) + + # Entropy + if distname not in ['kstwobign', 'kstwo', 'ncf']: + check_entropy(distfn, arg, distname) + + if distfn.numargs == 0: + check_vecentropy(distfn, arg) + + if (distfn.__class__._entropy != stats.rv_continuous._entropy + and distname != 'vonmises'): + check_private_entropy(distfn, arg, stats.rv_continuous) + + with npt.suppress_warnings() as sup: + sup.filter(IntegrationWarning, "The occurrence of roundoff error") + sup.filter(IntegrationWarning, "Extremely bad integrand") + sup.filter(RuntimeWarning, "invalid value") + check_entropy_vect_scale(distfn, arg) + + check_retrieving_support(distfn, arg) + check_edge_support(distfn, arg) + + check_meth_dtype(distfn, arg, meths) + check_ppf_dtype(distfn, arg) + + if distname not in fails_cmplx: + check_cmplx_deriv(distfn, arg) + + if distname != 'truncnorm': + check_ppf_private(distfn, arg, distname) + + for method in ["MLE", "MM"]: + if distname not in skip_fit_test[method]: + check_fit_args(distfn, arg, rvs[:n_fit_samples], method) + + if distname not in skip_fit_fix_test[method]: + check_fit_args_fix(distfn, arg, rvs[:n_fit_samples], method) + + +@pytest.mark.parametrize('distname,arg', cases_test_cont_basic()) +def test_rvs_scalar(distname, arg): + # rvs should return a scalar when given scalar arguments (gh-12428) + try: + distfn = getattr(stats, distname) + except TypeError: + distfn = distname + distname = 'rv_histogram_instance' + + assert np.isscalar(distfn.rvs(*arg)) + assert np.isscalar(distfn.rvs(*arg, size=())) + assert np.isscalar(distfn.rvs(*arg, size=None)) + + +def test_levy_stable_random_state_property(): + # levy_stable only implements rvs(), so it is skipped in the + # main loop in test_cont_basic(). Here we apply just the test + # check_random_state_property to levy_stable. + check_random_state_property(stats.levy_stable, (0.5, 0.1)) + + +def cases_test_moments(): + fail_normalization = set() + fail_higher = {'ncf'} + fail_moment = {'johnsonsu'} # generic `munp` is inaccurate for johnsonsu + + for distname, arg in distcont[:] + histogram_test_instances: + if distname == 'levy_stable': + continue + + if distname in distxslow_test_moments: + yield pytest.param(distname, arg, True, True, True, True, + marks=pytest.mark.xslow(reason="too slow")) + continue + + cond1 = distname not in fail_normalization + cond2 = distname not in fail_higher + cond3 = distname not in fail_moment + + marks = list() + # Currently unused, `marks` can be used to add a timeout to a test of + # a specific distribution. For example, this shows how a timeout could + # be added for the 'skewnorm' distribution: + # + # marks = list() + # if distname == 'skewnorm': + # marks.append(pytest.mark.timeout(300)) + + yield pytest.param(distname, arg, cond1, cond2, cond3, + False, marks=marks) + + if not cond1 or not cond2 or not cond3: + # Run the distributions that have issues twice, once skipping the + # not_ok parts, once with the not_ok parts but marked as knownfail + yield pytest.param(distname, arg, True, True, True, True, + marks=[pytest.mark.xfail] + marks) + + +@pytest.mark.slow +@pytest.mark.parametrize('distname,arg,normalization_ok,higher_ok,moment_ok,' + 'is_xfailing', + cases_test_moments()) +def test_moments(distname, arg, normalization_ok, higher_ok, moment_ok, + is_xfailing): + try: + distfn = getattr(stats, distname) + except TypeError: + distfn = distname + distname = 'rv_histogram_instance' + + with npt.suppress_warnings() as sup: + sup.filter(IntegrationWarning, + "The integral is probably divergent, or slowly convergent.") + sup.filter(IntegrationWarning, + "The maximum number of subdivisions.") + sup.filter(IntegrationWarning, + "The algorithm does not converge.") + + if is_xfailing: + sup.filter(IntegrationWarning) + + m, v, s, k = distfn.stats(*arg, moments='mvsk') + + with np.errstate(all="ignore"): + if normalization_ok: + check_normalization(distfn, arg, distname) + + if higher_ok: + check_mean_expect(distfn, arg, m, distname) + check_skew_expect(distfn, arg, m, v, s, distname) + check_var_expect(distfn, arg, m, v, distname) + check_kurt_expect(distfn, arg, m, v, k, distname) + check_munp_expect(distfn, arg, distname) + + check_loc_scale(distfn, arg, m, v, distname) + + if moment_ok: + check_moment(distfn, arg, m, v, distname) + + +@pytest.mark.parametrize('dist,shape_args', distcont) +def test_rvs_broadcast(dist, shape_args): + if dist in ['gausshyper', 'studentized_range']: + pytest.skip("too slow") + + if dist in ['rel_breitwigner'] and _IS_32BIT: + # gh18414 + pytest.skip("fails on Linux 32-bit") + + # If shape_only is True, it means the _rvs method of the + # distribution uses more than one random number to generate a random + # variate. That means the result of using rvs with broadcasting or + # with a nontrivial size will not necessarily be the same as using the + # numpy.vectorize'd version of rvs(), so we can only compare the shapes + # of the results, not the values. + # Whether or not a distribution is in the following list is an + # implementation detail of the distribution, not a requirement. If + # the implementation the rvs() method of a distribution changes, this + # test might also have to be changed. + shape_only = dist in ['argus', 'betaprime', 'dgamma', 'dweibull', + 'exponnorm', 'genhyperbolic', 'geninvgauss', + 'levy_stable', 'nct', 'norminvgauss', 'rice', + 'skewnorm', 'semicircular', 'gennorm', 'loggamma'] + + distfunc = getattr(stats, dist) + loc = np.zeros(2) + scale = np.ones((3, 1)) + nargs = distfunc.numargs + allargs = [] + bshape = [3, 2] + # Generate shape parameter arguments... + for k in range(nargs): + shp = (k + 4,) + (1,)*(k + 2) + allargs.append(shape_args[k]*np.ones(shp)) + bshape.insert(0, k + 4) + allargs.extend([loc, scale]) + # bshape holds the expected shape when loc, scale, and the shape + # parameters are all broadcast together. + + check_rvs_broadcast(distfunc, dist, allargs, bshape, shape_only, 'd') + + +# Expected values of the SF, CDF, PDF were computed using +# mpmath with mpmath.mp.dps = 50 and output at 20: +# +# def ks(x, n): +# x = mpmath.mpf(x) +# logp = -mpmath.power(6.0*n*x+1.0, 2)/18.0/n +# sf, cdf = mpmath.exp(logp), -mpmath.expm1(logp) +# pdf = (6.0*n*x+1.0) * 2 * sf/3 +# print(mpmath.nstr(sf, 20), mpmath.nstr(cdf, 20), mpmath.nstr(pdf, 20)) +# +# Tests use 1/n < x < 1-1/n and n > 1e6 to use the asymptotic computation. +# Larger x has a smaller sf. +@pytest.mark.parametrize('x,n,sf,cdf,pdf,rtol', + [(2.0e-5, 1000000000, + 0.44932297307934442379, 0.55067702692065557621, + 35946.137394996276407, 5e-15), + (2.0e-9, 1000000000, + 0.99999999061111115519, 9.3888888448132728224e-9, + 8.6666665852962971765, 5e-14), + (5.0e-4, 1000000000, + 7.1222019433090374624e-218, 1.0, + 1.4244408634752704094e-211, 5e-14)]) +def test_gh17775_regression(x, n, sf, cdf, pdf, rtol): + # Regression test for gh-17775. In scipy 1.9.3 and earlier, + # these test would fail. + # + # KS one asymptotic sf ~ e^(-(6nx+1)^2 / 18n) + # Given a large 32-bit integer n, 6n will overflow in the c implementation. + # Example of broken behaviour: + # ksone.sf(2.0e-5, 1000000000) == 0.9374359693473666 + ks = stats.ksone + vals = np.array([ks.sf(x, n), ks.cdf(x, n), ks.pdf(x, n)]) + expected = np.array([sf, cdf, pdf]) + npt.assert_allclose(vals, expected, rtol=rtol) + # The sf+cdf must sum to 1.0. + npt.assert_equal(vals[0] + vals[1], 1.0) + # Check inverting the (potentially very small) sf (uses a lower tolerance) + npt.assert_allclose([ks.isf(sf, n)], [x], rtol=1e-8) + + +def test_rvs_gh2069_regression(): + # Regression tests for gh-2069. In scipy 0.17 and earlier, + # these tests would fail. + # + # A typical example of the broken behavior: + # >>> norm.rvs(loc=np.zeros(5), scale=np.ones(5)) + # array([-2.49613705, -2.49613705, -2.49613705, -2.49613705, -2.49613705]) + rng = np.random.RandomState(123) + vals = stats.norm.rvs(loc=np.zeros(5), scale=1, random_state=rng) + d = np.diff(vals) + npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!") + vals = stats.norm.rvs(loc=0, scale=np.ones(5), random_state=rng) + d = np.diff(vals) + npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!") + vals = stats.norm.rvs(loc=np.zeros(5), scale=np.ones(5), random_state=rng) + d = np.diff(vals) + npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!") + vals = stats.norm.rvs(loc=np.array([[0], [0]]), scale=np.ones(5), + random_state=rng) + d = np.diff(vals.ravel()) + npt.assert_(np.all(d != 0), "All the values are equal, but they shouldn't be!") + + assert_raises(ValueError, stats.norm.rvs, [[0, 0], [0, 0]], + [[1, 1], [1, 1]], 1) + assert_raises(ValueError, stats.gamma.rvs, [2, 3, 4, 5], 0, 1, (2, 2)) + assert_raises(ValueError, stats.gamma.rvs, [1, 1, 1, 1], [0, 0, 0, 0], + [[1], [2]], (4,)) + + +def test_nomodify_gh9900_regression(): + # Regression test for gh-9990 + # Prior to gh-9990, calls to stats.truncnorm._cdf() use what ever was + # set inside the stats.truncnorm instance during stats.truncnorm.cdf(). + # This could cause issues with multi-threaded code. + # Since then, the calls to cdf() are not permitted to modify the global + # stats.truncnorm instance. + tn = stats.truncnorm + # Use the right-half truncated normal + # Check that the cdf and _cdf return the same result. + npt.assert_almost_equal(tn.cdf(1, 0, np.inf), + 0.6826894921370859) + npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), + 0.6826894921370859) + + # Now use the left-half truncated normal + npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), + 0.31731050786291415) + npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), + 0.31731050786291415) + + # Check that the right-half truncated normal _cdf hasn't changed + npt.assert_almost_equal(tn._cdf([1], [0], [np.inf]), + 0.6826894921370859) # Not 1.6826894921370859 + npt.assert_almost_equal(tn.cdf(1, 0, np.inf), + 0.6826894921370859) + + # Check that the left-half truncated normal _cdf hasn't changed + npt.assert_almost_equal(tn._cdf([-1], [-np.inf], [0]), + 0.31731050786291415) # Not -0.6826894921370859 + npt.assert_almost_equal(tn.cdf(1, -np.inf, 0), + 1) # Not 1.6826894921370859 + npt.assert_almost_equal(tn.cdf(-1, -np.inf, 0), + 0.31731050786291415) # Not -0.6826894921370859 + + +def test_broadcast_gh9990_regression(): + # Regression test for gh-9990 + # The x-value 7 only lies within the support of 4 of the supplied + # distributions. Prior to 9990, one array passed to + # stats.reciprocal._cdf would have 4 elements, but an array + # previously stored by stats.reciprocal_argcheck() would have 6, leading + # to a broadcast error. + a = np.array([1, 2, 3, 4, 5, 6]) + b = np.array([8, 16, 1, 32, 1, 48]) + ans = [stats.reciprocal.cdf(7, _a, _b) for _a, _b in zip(a,b)] + npt.assert_array_almost_equal(stats.reciprocal.cdf(7, a, b), ans) + + ans = [stats.reciprocal.cdf(1, _a, _b) for _a, _b in zip(a,b)] + npt.assert_array_almost_equal(stats.reciprocal.cdf(1, a, b), ans) + + ans = [stats.reciprocal.cdf(_a, _a, _b) for _a, _b in zip(a,b)] + npt.assert_array_almost_equal(stats.reciprocal.cdf(a, a, b), ans) + + ans = [stats.reciprocal.cdf(_b, _a, _b) for _a, _b in zip(a,b)] + npt.assert_array_almost_equal(stats.reciprocal.cdf(b, a, b), ans) + + +def test_broadcast_gh7933_regression(): + # Check broadcast works + stats.truncnorm.logpdf( + np.array([3.0, 2.0, 1.0]), + a=(1.5 - np.array([6.0, 5.0, 4.0])) / 3.0, + b=np.inf, + loc=np.array([6.0, 5.0, 4.0]), + scale=3.0 + ) + + +def test_gh2002_regression(): + # Add a check that broadcast works in situations where only some + # x-values are compatible with some of the shape arguments. + x = np.r_[-2:2:101j] + a = np.r_[-np.ones(50), np.ones(51)] + expected = [stats.truncnorm.pdf(_x, _a, np.inf) for _x, _a in zip(x, a)] + ans = stats.truncnorm.pdf(x, a, np.inf) + npt.assert_array_almost_equal(ans, expected) + + +def test_gh1320_regression(): + # Check that the first example from gh-1320 now works. + c = 2.62 + stats.genextreme.ppf(0.5, np.array([[c], [c + 0.5]])) + # The other examples in gh-1320 appear to have stopped working + # some time ago. + # ans = stats.genextreme.moment(2, np.array([c, c + 0.5])) + # expected = np.array([25.50105963, 115.11191437]) + # stats.genextreme.moment(5, np.array([[c], [c + 0.5]])) + # stats.genextreme.moment(5, np.array([c, c + 0.5])) + + +def test_method_of_moments(): + # example from https://en.wikipedia.org/wiki/Method_of_moments_(statistics) + np.random.seed(1234) + x = [0, 0, 0, 0, 1] + a = 1/5 - 2*np.sqrt(3)/5 + b = 1/5 + 2*np.sqrt(3)/5 + # force use of method of moments (uniform.fit is overridden) + loc, scale = super(type(stats.uniform), stats.uniform).fit(x, method="MM") + npt.assert_almost_equal(loc, a, decimal=4) + npt.assert_almost_equal(loc+scale, b, decimal=4) + + +def check_sample_meanvar_(popmean, popvar, sample): + if np.isfinite(popmean): + check_sample_mean(sample, popmean) + if np.isfinite(popvar): + check_sample_var(sample, popvar) + + +def check_sample_mean(sample, popmean): + # Checks for unlikely difference between sample mean and population mean + prob = stats.ttest_1samp(sample, popmean).pvalue + assert prob > 0.01 + + +def check_sample_var(sample, popvar): + # check that population mean lies within the CI bootstrapped from the + # sample. This used to be a chi-squared test for variance, but there were + # too many false positives + res = stats.bootstrap( + (sample,), + lambda x, axis: x.var(ddof=1, axis=axis), + confidence_level=0.995, + ) + conf = res.confidence_interval + low, high = conf.low, conf.high + assert low <= popvar <= high + + +def check_cdf_ppf(distfn, arg, msg): + values = [0.001, 0.5, 0.999] + npt.assert_almost_equal(distfn.cdf(distfn.ppf(values, *arg), *arg), + values, decimal=DECIMAL, err_msg=msg + + ' - cdf-ppf roundtrip') + + +def check_sf_isf(distfn, arg, msg): + npt.assert_almost_equal(distfn.sf(distfn.isf([0.1, 0.5, 0.9], *arg), *arg), + [0.1, 0.5, 0.9], decimal=DECIMAL, err_msg=msg + + ' - sf-isf roundtrip') + + +def check_cdf_sf(distfn, arg, msg): + npt.assert_almost_equal(distfn.cdf([0.1, 0.9], *arg), + 1.0 - distfn.sf([0.1, 0.9], *arg), + decimal=DECIMAL, err_msg=msg + + ' - cdf-sf relationship') + + +def check_ppf_isf(distfn, arg, msg): + p = np.array([0.1, 0.9]) + npt.assert_almost_equal(distfn.isf(p, *arg), distfn.ppf(1-p, *arg), + decimal=DECIMAL, err_msg=msg + + ' - ppf-isf relationship') + + +def check_pdf(distfn, arg, msg): + # compares pdf at median with numerical derivative of cdf + median = distfn.ppf(0.5, *arg) + eps = 1e-6 + pdfv = distfn.pdf(median, *arg) + if (pdfv < 1e-4) or (pdfv > 1e4): + # avoid checking a case where pdf is close to zero or + # huge (singularity) + median = median + 0.1 + pdfv = distfn.pdf(median, *arg) + cdfdiff = (distfn.cdf(median + eps, *arg) - + distfn.cdf(median - eps, *arg))/eps/2.0 + # replace with better diff and better test (more points), + # actually, this works pretty well + msg += ' - cdf-pdf relationship' + npt.assert_almost_equal(pdfv, cdfdiff, decimal=DECIMAL, err_msg=msg) + + +def check_pdf_logpdf(distfn, args, msg): + # compares pdf at several points with the log of the pdf + points = np.array([0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]) + vals = distfn.ppf(points, *args) + vals = vals[np.isfinite(vals)] + pdf = distfn.pdf(vals, *args) + logpdf = distfn.logpdf(vals, *args) + pdf = pdf[(pdf != 0) & np.isfinite(pdf)] + logpdf = logpdf[np.isfinite(logpdf)] + msg += " - logpdf-log(pdf) relationship" + npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg) + + +def check_pdf_logpdf_at_endpoints(distfn, args, msg): + # compares pdf with the log of the pdf at the (finite) end points + points = np.array([0, 1]) + vals = distfn.ppf(points, *args) + vals = vals[np.isfinite(vals)] + pdf = distfn.pdf(vals, *args) + logpdf = distfn.logpdf(vals, *args) + pdf = pdf[(pdf != 0) & np.isfinite(pdf)] + logpdf = logpdf[np.isfinite(logpdf)] + msg += " - logpdf-log(pdf) relationship" + npt.assert_almost_equal(np.log(pdf), logpdf, decimal=7, err_msg=msg) + + +def check_sf_logsf(distfn, args, msg): + # compares sf at several points with the log of the sf + points = np.array([0.0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]) + vals = distfn.ppf(points, *args) + vals = vals[np.isfinite(vals)] + sf = distfn.sf(vals, *args) + logsf = distfn.logsf(vals, *args) + sf = sf[sf != 0] + logsf = logsf[np.isfinite(logsf)] + msg += " - logsf-log(sf) relationship" + npt.assert_almost_equal(np.log(sf), logsf, decimal=7, err_msg=msg) + + +def check_cdf_logcdf(distfn, args, msg): + # compares cdf at several points with the log of the cdf + points = np.array([0, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]) + vals = distfn.ppf(points, *args) + vals = vals[np.isfinite(vals)] + cdf = distfn.cdf(vals, *args) + logcdf = distfn.logcdf(vals, *args) + cdf = cdf[cdf != 0] + logcdf = logcdf[np.isfinite(logcdf)] + msg += " - logcdf-log(cdf) relationship" + npt.assert_almost_equal(np.log(cdf), logcdf, decimal=7, err_msg=msg) + + +def check_ppf_broadcast(distfn, arg, msg): + # compares ppf for multiple argsets. + num_repeats = 5 + args = [] * num_repeats + if arg: + args = [np.array([_] * num_repeats) for _ in arg] + + median = distfn.ppf(0.5, *arg) + medians = distfn.ppf(0.5, *args) + msg += " - ppf multiple" + npt.assert_almost_equal(medians, [median] * num_repeats, decimal=7, err_msg=msg) + + +def check_distribution_rvs(dist, args, alpha, rvs): + # dist is either a cdf function or name of a distribution in scipy.stats. + # args are the args for scipy.stats.dist(*args) + # alpha is a significance level, ~0.01 + # rvs is array_like of random variables + # test from scipy.stats.tests + # this version reuses existing random variables + D, pval = stats.kstest(rvs, dist, args=args, N=1000) + if (pval < alpha): + # The rvs passed in failed the K-S test, which _could_ happen + # but is unlikely if alpha is small enough. + # Repeat the test with a new sample of rvs. + # Generate 1000 rvs, perform a K-S test that the new sample of rvs + # are distributed according to the distribution. + D, pval = stats.kstest(dist, dist, args=args, N=1000) + npt.assert_(pval > alpha, "D = " + str(D) + "; pval = " + str(pval) + + "; alpha = " + str(alpha) + "\nargs = " + str(args)) + + +def check_vecentropy(distfn, args): + npt.assert_equal(distfn.vecentropy(*args), distfn._entropy(*args)) + + +def check_loc_scale(distfn, arg, m, v, msg): + # Make `loc` and `scale` arrays to catch bugs like gh-13580 where + # `loc` and `scale` arrays improperly broadcast with shapes. + loc, scale = np.array([10.0, 20.0]), np.array([10.0, 20.0]) + mt, vt = distfn.stats(*arg, loc=loc, scale=scale) + npt.assert_allclose(m*scale + loc, mt) + npt.assert_allclose(v*scale*scale, vt) + + +def check_ppf_private(distfn, arg, msg): + # fails by design for truncnorm self.nb not defined + ppfs = distfn._ppf(np.array([0.1, 0.5, 0.9]), *arg) + npt.assert_(not np.any(np.isnan(ppfs)), msg + 'ppf private is nan') + + +def check_retrieving_support(distfn, args): + loc, scale = 1, 2 + supp = distfn.support(*args) + supp_loc_scale = distfn.support(*args, loc=loc, scale=scale) + npt.assert_almost_equal(np.array(supp)*scale + loc, + np.array(supp_loc_scale)) + + +def check_fit_args(distfn, arg, rvs, method): + with np.errstate(all='ignore'), npt.suppress_warnings() as sup: + sup.filter(category=RuntimeWarning, + message="The shape parameter of the erlang") + sup.filter(category=RuntimeWarning, + message="floating point number truncated") + vals = distfn.fit(rvs, method=method) + vals2 = distfn.fit(rvs, optimizer='powell', method=method) + # Only check the length of the return; accuracy tested in test_fit.py + npt.assert_(len(vals) == 2+len(arg)) + npt.assert_(len(vals2) == 2+len(arg)) + + +def check_fit_args_fix(distfn, arg, rvs, method): + with np.errstate(all='ignore'), npt.suppress_warnings() as sup: + sup.filter(category=RuntimeWarning, + message="The shape parameter of the erlang") + + vals = distfn.fit(rvs, floc=0, method=method) + vals2 = distfn.fit(rvs, fscale=1, method=method) + npt.assert_(len(vals) == 2+len(arg)) + npt.assert_(vals[-2] == 0) + npt.assert_(vals2[-1] == 1) + npt.assert_(len(vals2) == 2+len(arg)) + if len(arg) > 0: + vals3 = distfn.fit(rvs, f0=arg[0], method=method) + npt.assert_(len(vals3) == 2+len(arg)) + npt.assert_(vals3[0] == arg[0]) + if len(arg) > 1: + vals4 = distfn.fit(rvs, f1=arg[1], method=method) + npt.assert_(len(vals4) == 2+len(arg)) + npt.assert_(vals4[1] == arg[1]) + if len(arg) > 2: + vals5 = distfn.fit(rvs, f2=arg[2], method=method) + npt.assert_(len(vals5) == 2+len(arg)) + npt.assert_(vals5[2] == arg[2]) + + +@pytest.mark.parametrize('method', ['pdf', 'logpdf', 'cdf', 'logcdf', + 'sf', 'logsf', 'ppf', 'isf']) +@pytest.mark.parametrize('distname, args', distcont) +def test_methods_with_lists(method, distname, args): + # Test that the continuous distributions can accept Python lists + # as arguments. + dist = getattr(stats, distname) + f = getattr(dist, method) + if distname == 'invweibull' and method.startswith('log'): + x = [1.5, 2] + else: + x = [0.1, 0.2] + + shape2 = [[a]*2 for a in args] + loc = [0, 0.1] + scale = [1, 1.01] + result = f(x, *shape2, loc=loc, scale=scale) + npt.assert_allclose(result, + [f(*v) for v in zip(x, *shape2, loc, scale)], + rtol=1e-14, atol=5e-14) + + +def test_burr_fisk_moment_gh13234_regression(): + vals0 = stats.burr.moment(1, 5, 4) + assert isinstance(vals0, float) + + vals1 = stats.fisk.moment(1, 8) + assert isinstance(vals1, float) + + +def test_moments_with_array_gh12192_regression(): + # array loc and scalar scale + vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=1) + expected0 = np.array([1., 2., 3.]) + npt.assert_equal(vals0, expected0) + + # array loc and invalid scalar scale + vals1 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=-1) + expected1 = np.array([np.nan, np.nan, np.nan]) + npt.assert_equal(vals1, expected1) + + # array loc and array scale with invalid entries + vals2 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), + scale=[-3, 1, 0]) + expected2 = np.array([np.nan, 2., np.nan]) + npt.assert_equal(vals2, expected2) + + # (loc == 0) & (scale < 0) + vals3 = stats.norm.moment(order=2, loc=0, scale=-4) + expected3 = np.nan + npt.assert_equal(vals3, expected3) + assert isinstance(vals3, expected3.__class__) + + # array loc with 0 entries and scale with invalid entries + vals4 = stats.norm.moment(order=2, loc=[1, 0, 2], scale=[3, -4, -5]) + expected4 = np.array([10., np.nan, np.nan]) + npt.assert_equal(vals4, expected4) + + # all(loc == 0) & (array scale with invalid entries) + vals5 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[5., -2, 100.]) + expected5 = np.array([25., np.nan, 10000.]) + npt.assert_equal(vals5, expected5) + + # all( (loc == 0) & (scale < 0) ) + vals6 = stats.norm.moment(order=2, loc=[0, 0, 0], scale=[-5., -2, -100.]) + expected6 = np.array([np.nan, np.nan, np.nan]) + npt.assert_equal(vals6, expected6) + + # scalar args, loc, and scale + vals7 = stats.chi.moment(order=2, df=1, loc=0, scale=0) + expected7 = np.nan + npt.assert_equal(vals7, expected7) + assert isinstance(vals7, expected7.__class__) + + # array args, scalar loc, and scalar scale + vals8 = stats.chi.moment(order=2, df=[1, 2, 3], loc=0, scale=0) + expected8 = np.array([np.nan, np.nan, np.nan]) + npt.assert_equal(vals8, expected8) + + # array args, array loc, and array scale + vals9 = stats.chi.moment(order=2, df=[1, 2, 3], loc=[1., 0., 2.], + scale=[1., -3., 0.]) + expected9 = np.array([3.59576912, np.nan, np.nan]) + npt.assert_allclose(vals9, expected9, rtol=1e-8) + + # (n > 4), all(loc != 0), and all(scale != 0) + vals10 = stats.norm.moment(5, [1., 2.], [1., 2.]) + expected10 = np.array([26., 832.]) + npt.assert_allclose(vals10, expected10, rtol=1e-13) + + # test broadcasting and more + a = [-1.1, 0, 1, 2.2, np.pi] + b = [-1.1, 0, 1, 2.2, np.pi] + loc = [-1.1, 0, np.sqrt(2)] + scale = [-2.1, 0, 1, 2.2, np.pi] + + a = np.array(a).reshape((-1, 1, 1, 1)) + b = np.array(b).reshape((-1, 1, 1)) + loc = np.array(loc).reshape((-1, 1)) + scale = np.array(scale) + + vals11 = stats.beta.moment(order=2, a=a, b=b, loc=loc, scale=scale) + + a, b, loc, scale = np.broadcast_arrays(a, b, loc, scale) + + for i in np.ndenumerate(a): + with np.errstate(invalid='ignore', divide='ignore'): + i = i[0] # just get the index + # check against same function with scalar input + expected = stats.beta.moment(order=2, a=a[i], b=b[i], + loc=loc[i], scale=scale[i]) + np.testing.assert_equal(vals11[i], expected) + + +def test_broadcasting_in_moments_gh12192_regression(): + vals0 = stats.norm.moment(order=1, loc=np.array([1, 2, 3]), scale=[[1]]) + expected0 = np.array([[1., 2., 3.]]) + npt.assert_equal(vals0, expected0) + assert vals0.shape == expected0.shape + + vals1 = stats.norm.moment(order=1, loc=np.array([[1], [2], [3]]), + scale=[1, 2, 3]) + expected1 = np.array([[1., 1., 1.], [2., 2., 2.], [3., 3., 3.]]) + npt.assert_equal(vals1, expected1) + assert vals1.shape == expected1.shape + + vals2 = stats.chi.moment(order=1, df=[1., 2., 3.], loc=0., scale=1.) + expected2 = np.array([0.79788456, 1.25331414, 1.59576912]) + npt.assert_allclose(vals2, expected2, rtol=1e-8) + assert vals2.shape == expected2.shape + + vals3 = stats.chi.moment(order=1, df=[[1.], [2.], [3.]], loc=[0., 1., 2.], + scale=[-1., 0., 3.]) + expected3 = np.array([[np.nan, np.nan, 4.39365368], + [np.nan, np.nan, 5.75994241], + [np.nan, np.nan, 6.78730736]]) + npt.assert_allclose(vals3, expected3, rtol=1e-8) + assert vals3.shape == expected3.shape + + +def test_kappa3_array_gh13582(): + # https://github.com/scipy/scipy/pull/15140#issuecomment-994958241 + shapes = [0.5, 1.5, 2.5, 3.5, 4.5] + moments = 'mvsk' + res = np.array([[stats.kappa3.stats(shape, moments=moment) + for shape in shapes] for moment in moments]) + res2 = np.array(stats.kappa3.stats(shapes, moments=moments)) + npt.assert_allclose(res, res2) + + +@pytest.mark.xslow +def test_kappa4_array_gh13582(): + h = np.array([-0.5, 2.5, 3.5, 4.5, -3]) + k = np.array([-0.5, 1, -1.5, 0, 3.5]) + moments = 'mvsk' + res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment) + for i in range(5)] for moment in moments]) + res2 = np.array(stats.kappa4.stats(h, k, moments=moments)) + npt.assert_allclose(res, res2) + + # https://github.com/scipy/scipy/pull/15250#discussion_r775112913 + h = np.array([-1, -1/4, -1/4, 1, -1, 0]) + k = np.array([1, 1, 1/2, -1/3, -1, 0]) + res = np.array([[stats.kappa4.stats(h[i], k[i], moments=moment) + for i in range(6)] for moment in moments]) + res2 = np.array(stats.kappa4.stats(h, k, moments=moments)) + npt.assert_allclose(res, res2) + + # https://github.com/scipy/scipy/pull/15250#discussion_r775115021 + h = np.array([-1, -0.5, 1]) + k = np.array([-1, -0.5, 0, 1])[:, None] + res2 = np.array(stats.kappa4.stats(h, k, moments=moments)) + assert res2.shape == (4, 4, 3) + + +def test_frozen_attributes(): + # gh-14827 reported that all frozen distributions had both pmf and pdf + # attributes; continuous should have pdf and discrete should have pmf. + message = "'rv_continuous_frozen' object has no attribute" + with pytest.raises(AttributeError, match=message): + stats.norm().pmf + with pytest.raises(AttributeError, match=message): + stats.norm().logpmf + stats.norm.pmf = "herring" + frozen_norm = stats.norm() + assert isinstance(frozen_norm, rv_continuous_frozen) + delattr(stats.norm, 'pmf') + + +def test_skewnorm_pdf_gh16038(): + rng = np.random.default_rng(0) + x, a = -np.inf, 0 + npt.assert_equal(stats.skewnorm.pdf(x, a), stats.norm.pdf(x)) + x, a = rng.random(size=(3, 3)), rng.random(size=(3, 3)) + mask = rng.random(size=(3, 3)) < 0.5 + a[mask] = 0 + x_norm = x[mask] + res = stats.skewnorm.pdf(x, a) + npt.assert_equal(res[mask], stats.norm.pdf(x_norm)) + npt.assert_equal(res[~mask], stats.skewnorm.pdf(x[~mask], a[~mask])) + + +# for scalar input, these functions should return scalar output +scalar_out = [['rvs', []], ['pdf', [0]], ['logpdf', [0]], ['cdf', [0]], + ['logcdf', [0]], ['sf', [0]], ['logsf', [0]], ['ppf', [0]], + ['isf', [0]], ['moment', [1]], ['entropy', []], ['expect', []], + ['median', []], ['mean', []], ['std', []], ['var', []]] +scalars_out = [['interval', [0.95]], ['support', []], ['stats', ['mv']]] + + +@pytest.mark.parametrize('case', scalar_out + scalars_out) +def test_scalar_for_scalar(case): + # Some rv_continuous functions returned 0d array instead of NumPy scalar + # Guard against regression + method_name, args = case + method = getattr(stats.norm(), method_name) + res = method(*args) + if case in scalar_out: + assert isinstance(res, np.number) + else: + assert isinstance(res[0], np.number) + assert isinstance(res[1], np.number) + + +def test_scalar_for_scalar2(): + # test methods that are not attributes of frozen distributions + res = stats.norm.fit([1, 2, 3]) + assert isinstance(res[0], np.number) + assert isinstance(res[1], np.number) + res = stats.norm.fit_loc_scale([1, 2, 3]) + assert isinstance(res[0], np.number) + assert isinstance(res[1], np.number) + res = stats.norm.nnlf((0, 1), [1, 2, 3]) + assert isinstance(res, np.number) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_fit_censored.py b/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_fit_censored.py new file mode 100644 index 0000000000000000000000000000000000000000..1567b56ffeed900435a69210bed9b5b26251ae1f --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_continuous_fit_censored.py @@ -0,0 +1,683 @@ +# Tests for fitting specific distributions to censored data. + +import numpy as np +from numpy.testing import assert_allclose + +from scipy.optimize import fmin +from scipy.stats import (CensoredData, beta, cauchy, chi2, expon, gamma, + gumbel_l, gumbel_r, invgauss, invweibull, laplace, + logistic, lognorm, nct, ncx2, norm, weibull_max, + weibull_min) + + +# In some tests, we'll use this optimizer for improved accuracy. +def optimizer(func, x0, args=(), disp=0): + return fmin(func, x0, args=args, disp=disp, xtol=1e-12, ftol=1e-12) + + +def test_beta(): + """ + Test fitting beta shape parameters to interval-censored data. + + Calculation in R: + + > library(fitdistrplus) + > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80), + + right=c(0.20, 0.55, 0.90, 0.95)) + > result = fitdistcens(data, 'beta', control=list(reltol=1e-14)) + + > result + Fitting of the distribution ' beta ' on censored data by maximum likelihood + Parameters: + estimate + shape1 1.419941 + shape2 1.027066 + > result$sd + shape1 shape2 + 0.9914177 0.6866565 + """ + data = CensoredData(interval=[[0.10, 0.20], + [0.50, 0.55], + [0.75, 0.90], + [0.80, 0.95]]) + + # For this test, fit only the shape parameters; loc and scale are fixed. + a, b, loc, scale = beta.fit(data, floc=0, fscale=1, optimizer=optimizer) + + assert_allclose(a, 1.419941, rtol=5e-6) + assert_allclose(b, 1.027066, rtol=5e-6) + assert loc == 0 + assert scale == 1 + + +def test_cauchy_right_censored(): + """ + Test fitting the Cauchy distribution to right-censored data. + + Calculation in R, with two values not censored [1, 10] and + one right-censored value [30]. + + > library(fitdistrplus) + > data <- data.frame(left=c(1, 10, 30), right=c(1, 10, NA)) + > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14)) + > result + Fitting of the distribution ' cauchy ' on censored data by maximum + likelihood + Parameters: + estimate + location 7.100001 + scale 7.455866 + """ + data = CensoredData(uncensored=[1, 10], right=[30]) + loc, scale = cauchy.fit(data, optimizer=optimizer) + assert_allclose(loc, 7.10001, rtol=5e-6) + assert_allclose(scale, 7.455866, rtol=5e-6) + + +def test_cauchy_mixed(): + """ + Test fitting the Cauchy distribution to data with mixed censoring. + + Calculation in R, with: + * two values not censored [1, 10], + * one left-censored [1], + * one right-censored [30], and + * one interval-censored [[4, 8]]. + + > library(fitdistrplus) + > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA)) + > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14)) + > result + Fitting of the distribution ' cauchy ' on censored data by maximum + likelihood + Parameters: + estimate + location 4.605150 + scale 5.900852 + """ + data = CensoredData(uncensored=[1, 10], left=[1], right=[30], + interval=[[4, 8]]) + loc, scale = cauchy.fit(data, optimizer=optimizer) + assert_allclose(loc, 4.605150, rtol=5e-6) + assert_allclose(scale, 5.900852, rtol=5e-6) + + +def test_chi2_mixed(): + """ + Test fitting just the shape parameter (df) of chi2 to mixed data. + + Calculation in R, with: + * two values not censored [1, 10], + * one left-censored [1], + * one right-censored [30], and + * one interval-censored [[4, 8]]. + + > library(fitdistrplus) + > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA)) + > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14)) + > result + Fitting of the distribution ' chisq ' on censored data by maximum + likelihood + Parameters: + estimate + df 5.060329 + """ + data = CensoredData(uncensored=[1, 10], left=[1], right=[30], + interval=[[4, 8]]) + df, loc, scale = chi2.fit(data, floc=0, fscale=1, optimizer=optimizer) + assert_allclose(df, 5.060329, rtol=5e-6) + assert loc == 0 + assert scale == 1 + + +def test_expon_right_censored(): + """ + For the exponential distribution with loc=0, the exact solution for + fitting n uncensored points x[0]...x[n-1] and m right-censored points + x[n]..x[n+m-1] is + + scale = sum(x)/n + + That is, divide the sum of all the values (not censored and + right-censored) by the number of uncensored values. (See, for example, + https://en.wikipedia.org/wiki/Censoring_(statistics)#Likelihood.) + + The second derivative of the log-likelihood function is + + n/scale**2 - 2*sum(x)/scale**3 + + from which the estimate of the standard error can be computed. + + ----- + + Calculation in R, for reference only. The R results are not + used in the test. + + > library(fitdistrplus) + > dexps <- function(x, scale) { + + return(dexp(x, 1/scale)) + + } + > pexps <- function(q, scale) { + + return(pexp(q, 1/scale)) + + } + > left <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, + + 16, 16, 20, 20, 21, 22) + > right <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, + + NA, NA, NA, NA, NA, NA) + > result = fitdistcens(data, 'exps', start=list(scale=mean(data$left)), + + control=list(reltol=1e-14)) + > result + Fitting of the distribution ' exps ' on censored data by maximum likelihood + Parameters: + estimate + scale 19.85 + > result$sd + scale + 6.277119 + """ + # This data has 10 uncensored values and 6 right-censored values. + obs = [1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, 16, 16, 20, 20, 21, 22] + cens = [False]*10 + [True]*6 + data = CensoredData.right_censored(obs, cens) + + loc, scale = expon.fit(data, floc=0, optimizer=optimizer) + + assert loc == 0 + # Use the analytical solution to compute the expected value. This + # is the sum of the observed values divided by the number of uncensored + # values. + n = len(data) - data.num_censored() + total = data._uncensored.sum() + data._right.sum() + expected = total / n + assert_allclose(scale, expected, 1e-8) + + +def test_gamma_right_censored(): + """ + Fit gamma shape and scale to data with one right-censored value. + + Calculation in R: + + > library(fitdistrplus) + > data <- data.frame(left=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, 25.0), + + right=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, NA)) + > result = fitdistcens(data, 'gamma', start=list(shape=1, scale=10), + + control=list(reltol=1e-13)) + > result + Fitting of the distribution ' gamma ' on censored data by maximum + likelihood + Parameters: + estimate + shape 1.447623 + scale 8.360197 + > result$sd + shape scale + 0.7053086 5.1016531 + """ + # The last value is right-censored. + x = CensoredData.right_censored([2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, + 25.0], + [0]*7 + [1]) + + a, loc, scale = gamma.fit(x, floc=0, optimizer=optimizer) + + assert_allclose(a, 1.447623, rtol=5e-6) + assert loc == 0 + assert_allclose(scale, 8.360197, rtol=5e-6) + + +def test_gumbel(): + """ + Fit gumbel_l and gumbel_r to censored data. + + This R calculation should match gumbel_r. + + > library(evd) + > library(fitdistrplus) + > data = data.frame(left=c(0, 2, 3, 9, 10, 10), + + right=c(1, 2, 3, 9, NA, NA)) + > result = fitdistcens(data, 'gumbel', + + control=list(reltol=1e-14), + + start=list(loc=4, scale=5)) + > result + Fitting of the distribution ' gumbel ' on censored data by maximum + likelihood + Parameters: + estimate + loc 4.487853 + scale 4.843640 + """ + # First value is interval-censored. Last two are right-censored. + uncensored = np.array([2, 3, 9]) + right = np.array([10, 10]) + interval = np.array([[0, 1]]) + data = CensoredData(uncensored, right=right, interval=interval) + loc, scale = gumbel_r.fit(data, optimizer=optimizer) + assert_allclose(loc, 4.487853, rtol=5e-6) + assert_allclose(scale, 4.843640, rtol=5e-6) + + # Negate the data and reverse the intervals, and test with gumbel_l. + data2 = CensoredData(-uncensored, left=-right, + interval=-interval[:, ::-1]) + # Fitting gumbel_l to data2 should give the same result as above, but + # with loc negated. + loc2, scale2 = gumbel_l.fit(data2, optimizer=optimizer) + assert_allclose(loc2, -4.487853, rtol=5e-6) + assert_allclose(scale2, 4.843640, rtol=5e-6) + + +def test_invgauss(): + """ + Fit just the shape parameter of invgauss to data with one value + left-censored and one value right-censored. + + Calculation in R; using a fixed dispersion parameter amounts to fixing + the scale to be 1. + + > library(statmod) + > library(fitdistrplus) + > left <- c(NA, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386, + + 0.4822340, 0.3478597, 3, 0.7191797, 1.5810902, 0.4442299) + > right <- c(0.15, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386, + + 0.4822340, 0.3478597, NA, 0.7191797, 1.5810902, 0.4442299) + > data <- data.frame(left=left, right=right) + > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12), + + fix.arg=list(dispersion=1), start=list(mean=3)) + > result + Fitting of the distribution ' invgauss ' on censored data by maximum + likelihood + Parameters: + estimate + mean 0.853469 + Fixed parameters: + value + dispersion 1 + > result$sd + mean + 0.247636 + + Here's the R calculation with the dispersion as a free parameter to + be fit. + + > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12), + + start=list(mean=3, dispersion=1)) + > result + Fitting of the distribution ' invgauss ' on censored data by maximum + likelihood + Parameters: + estimate + mean 0.8699819 + dispersion 1.2261362 + + The parametrization of the inverse Gaussian distribution in the + `statmod` package is not the same as in SciPy (see + https://arxiv.org/abs/1603.06687 + for details). The translation from R to SciPy is + + scale = 1/dispersion + mu = mean * dispersion + + > 1/result$estimate['dispersion'] # 1/dispersion + dispersion + 0.8155701 + > result$estimate['mean'] * result$estimate['dispersion'] + mean + 1.066716 + + Those last two values are the SciPy scale and shape parameters. + """ + # One point is left-censored, and one is right-censored. + x = [0.4813096, 0.5571880, 0.5132463, 0.3801414, + 0.5904386, 0.4822340, 0.3478597, 0.7191797, + 1.5810902, 0.4442299] + data = CensoredData(uncensored=x, left=[0.15], right=[3]) + + # Fit only the shape parameter. + mu, loc, scale = invgauss.fit(data, floc=0, fscale=1, optimizer=optimizer) + + assert_allclose(mu, 0.853469, rtol=5e-5) + assert loc == 0 + assert scale == 1 + + # Fit the shape and scale. + mu, loc, scale = invgauss.fit(data, floc=0, optimizer=optimizer) + + assert_allclose(mu, 1.066716, rtol=5e-5) + assert loc == 0 + assert_allclose(scale, 0.8155701, rtol=5e-5) + + +def test_invweibull(): + """ + Fit invweibull to censored data. + + Here is the calculation in R. The 'frechet' distribution from the evd + package matches SciPy's invweibull distribution. The `loc` parameter + is fixed at 0. + + > library(evd) + > library(fitdistrplus) + > data = data.frame(left=c(0, 2, 3, 9, 10, 10), + + right=c(1, 2, 3, 9, NA, NA)) + > result = fitdistcens(data, 'frechet', + + control=list(reltol=1e-14), + + start=list(loc=4, scale=5)) + > result + Fitting of the distribution ' frechet ' on censored data by maximum + likelihood + Parameters: + estimate + scale 2.7902200 + shape 0.6379845 + Fixed parameters: + value + loc 0 + """ + # In the R data, the first value is interval-censored, and the last + # two are right-censored. The rest are not censored. + data = CensoredData(uncensored=[2, 3, 9], right=[10, 10], + interval=[[0, 1]]) + c, loc, scale = invweibull.fit(data, floc=0, optimizer=optimizer) + assert_allclose(c, 0.6379845, rtol=5e-6) + assert loc == 0 + assert_allclose(scale, 2.7902200, rtol=5e-6) + + +def test_laplace(): + """ + Fir the Laplace distribution to left- and right-censored data. + + Calculation in R: + + > library(fitdistrplus) + > dlaplace <- function(x, location=0, scale=1) { + + return(0.5*exp(-abs((x - location)/scale))/scale) + + } + > plaplace <- function(q, location=0, scale=1) { + + z <- (q - location)/scale + + s <- sign(z) + + f <- -s*0.5*exp(-abs(z)) + (s+1)/2 + + return(f) + + } + > left <- c(NA, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684, + + -19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372, + + 25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862, + + 32.8921, 9.0448, -27.4591, NA, 19.5083, -9.7199) + > right <- c(-50.0, -41.564, NA, 15.7384, NA, 10.0452, -2.0684, + + -19.5399, NA, 9.0005, 27.1227, 4.3113, -3.7372, + + 25.3111, 14.7987, 34.0887, NA, 42.8496, 18.5862, + + 32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199) + > data <- data.frame(left=left, right=right) + > result <- fitdistcens(data, 'laplace', start=list(location=10, scale=10), + + control=list(reltol=1e-13)) + > result + Fitting of the distribution ' laplace ' on censored data by maximum + likelihood + Parameters: + estimate + location 14.79870 + scale 30.93601 + > result$sd + location scale + 0.1758864 7.0972125 + """ + # The value -50 is left-censored, and the value 50 is right-censored. + obs = np.array([-50.0, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684, + -19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372, + 25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862, + 32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199]) + x = obs[(obs != -50.0) & (obs != 50)] + left = obs[obs == -50.0] + right = obs[obs == 50.0] + data = CensoredData(uncensored=x, left=left, right=right) + loc, scale = laplace.fit(data, loc=10, scale=10, optimizer=optimizer) + assert_allclose(loc, 14.79870, rtol=5e-6) + assert_allclose(scale, 30.93601, rtol=5e-6) + + +def test_logistic(): + """ + Fit the logistic distribution to left-censored data. + + Calculation in R: + > library(fitdistrplus) + > left = c(13.5401, 37.4235, 11.906 , 13.998 , NA , 0.4023, NA , + + 10.9044, 21.0629, 9.6985, NA , 12.9016, 39.164 , 34.6396, + + NA , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949, + + 3.4041, NA , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977, + + 16.3391, 36.0541) + > right = c(13.5401, 37.4235, 11.906 , 13.998 , 0. , 0.4023, 0. , + + 10.9044, 21.0629, 9.6985, 0. , 12.9016, 39.164 , 34.6396, + + 0. , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306, 8.4949, + + 3.4041, 0. , 7.2828, 37.1265, 6.5969, 17.6868, 17.4977, + + 16.3391, 36.0541) + > data = data.frame(left=left, right=right) + > result = fitdistcens(data, 'logis', control=list(reltol=1e-14)) + > result + Fitting of the distribution ' logis ' on censored data by maximum + likelihood + Parameters: + estimate + location 14.633459 + scale 9.232736 + > result$sd + location scale + 2.931505 1.546879 + """ + # Values that are zero are left-censored; the true values are less than 0. + x = np.array([13.5401, 37.4235, 11.906, 13.998, 0.0, 0.4023, 0.0, 10.9044, + 21.0629, 9.6985, 0.0, 12.9016, 39.164, 34.6396, 0.0, 20.3665, + 16.5889, 18.0952, 45.3818, 35.3306, 8.4949, 3.4041, 0.0, + 7.2828, 37.1265, 6.5969, 17.6868, 17.4977, 16.3391, + 36.0541]) + data = CensoredData.left_censored(x, censored=(x == 0)) + loc, scale = logistic.fit(data, optimizer=optimizer) + assert_allclose(loc, 14.633459, rtol=5e-7) + assert_allclose(scale, 9.232736, rtol=5e-6) + + +def test_lognorm(): + """ + Ref: https://math.montana.edu/jobo/st528/documents/relc.pdf + + The data is the locomotive control time to failure example that starts + on page 8. That's the 8th page in the PDF; the page number shown in + the text is 270). + The document includes SAS output for the data. + """ + # These are the uncensored measurements. There are also 59 right-censored + # measurements where the lower bound is 135. + miles_to_fail = [22.5, 37.5, 46.0, 48.5, 51.5, 53.0, 54.5, 57.5, 66.5, + 68.0, 69.5, 76.5, 77.0, 78.5, 80.0, 81.5, 82.0, 83.0, + 84.0, 91.5, 93.5, 102.5, 107.0, 108.5, 112.5, 113.5, + 116.0, 117.0, 118.5, 119.0, 120.0, 122.5, 123.0, 127.5, + 131.0, 132.5, 134.0] + + data = CensoredData.right_censored(miles_to_fail + [135]*59, + [0]*len(miles_to_fail) + [1]*59) + sigma, loc, scale = lognorm.fit(data, floc=0) + + assert loc == 0 + # Convert the lognorm parameters to the mu and sigma of the underlying + # normal distribution. + mu = np.log(scale) + # The expected results are from the 17th page of the PDF document + # (labeled page 279), in the SAS output on the right side of the page. + assert_allclose(mu, 5.1169, rtol=5e-4) + assert_allclose(sigma, 0.7055, rtol=5e-3) + + +def test_nct(): + """ + Test fitting the noncentral t distribution to censored data. + + Calculation in R: + + > library(fitdistrplus) + > data <- data.frame(left=c(1, 2, 3, 5, 8, 10, 25, 25), + + right=c(1, 2, 3, 5, 8, 10, NA, NA)) + > result = fitdistcens(data, 't', control=list(reltol=1e-14), + + start=list(df=1, ncp=2)) + > result + Fitting of the distribution ' t ' on censored data by maximum likelihood + Parameters: + estimate + df 0.5432336 + ncp 2.8893565 + + """ + data = CensoredData.right_censored([1, 2, 3, 5, 8, 10, 25, 25], + [0, 0, 0, 0, 0, 0, 1, 1]) + # Fit just the shape parameter df and nc; loc and scale are fixed. + with np.errstate(over='ignore'): # remove context when gh-14901 is closed + df, nc, loc, scale = nct.fit(data, floc=0, fscale=1, + optimizer=optimizer) + assert_allclose(df, 0.5432336, rtol=5e-6) + assert_allclose(nc, 2.8893565, rtol=5e-6) + assert loc == 0 + assert scale == 1 + + +def test_ncx2(): + """ + Test fitting the shape parameters (df, ncp) of ncx2 to mixed data. + + Calculation in R, with + * 5 not censored values [2.7, 0.2, 6.5, 0.4, 0.1], + * 1 interval-censored value [[0.6, 1.0]], and + * 2 right-censored values [8, 8]. + + > library(fitdistrplus) + > data <- data.frame(left=c(2.7, 0.2, 6.5, 0.4, 0.1, 0.6, 8, 8), + + right=c(2.7, 0.2, 6.5, 0.4, 0.1, 1.0, NA, NA)) + > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14), + + start=list(df=1, ncp=2)) + > result + Fitting of the distribution ' chisq ' on censored data by maximum + likelihood + Parameters: + estimate + df 1.052871 + ncp 2.362934 + """ + data = CensoredData(uncensored=[2.7, 0.2, 6.5, 0.4, 0.1], right=[8, 8], + interval=[[0.6, 1.0]]) + with np.errstate(over='ignore'): # remove context when gh-14901 is closed + df, ncp, loc, scale = ncx2.fit(data, floc=0, fscale=1, + optimizer=optimizer) + assert_allclose(df, 1.052871, rtol=5e-6) + assert_allclose(ncp, 2.362934, rtol=5e-6) + assert loc == 0 + assert scale == 1 + + +def test_norm(): + """ + Test fitting the normal distribution to interval-censored data. + + Calculation in R: + + > library(fitdistrplus) + > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80), + + right=c(0.20, 0.55, 0.90, 0.95)) + > result = fitdistcens(data, 'norm', control=list(reltol=1e-14)) + + > result + Fitting of the distribution ' norm ' on censored data by maximum likelihood + Parameters: + estimate + mean 0.5919990 + sd 0.2868042 + > result$sd + mean sd + 0.1444432 0.1029451 + """ + data = CensoredData(interval=[[0.10, 0.20], + [0.50, 0.55], + [0.75, 0.90], + [0.80, 0.95]]) + + loc, scale = norm.fit(data, optimizer=optimizer) + + assert_allclose(loc, 0.5919990, rtol=5e-6) + assert_allclose(scale, 0.2868042, rtol=5e-6) + + +def test_weibull_censored1(): + # Ref: http://www.ams.sunysb.edu/~zhu/ams588/Lecture_3_likelihood.pdf + + # Survival times; '*' indicates right-censored. + s = "3,5,6*,8,10*,11*,15,20*,22,23,27*,29,32,35,40,26,28,33*,21,24*" + + times, cens = zip(*[(float(t[0]), len(t) == 2) + for t in [w.split('*') for w in s.split(',')]]) + data = CensoredData.right_censored(times, cens) + + c, loc, scale = weibull_min.fit(data, floc=0) + + # Expected values are from the reference. + assert_allclose(c, 2.149, rtol=1e-3) + assert loc == 0 + assert_allclose(scale, 28.99, rtol=1e-3) + + # Flip the sign of the data, and make the censored values + # left-censored. We should get the same parameters when we fit + # weibull_max to the flipped data. + data2 = CensoredData.left_censored(-np.array(times), cens) + + c2, loc2, scale2 = weibull_max.fit(data2, floc=0) + + assert_allclose(c2, 2.149, rtol=1e-3) + assert loc2 == 0 + assert_allclose(scale2, 28.99, rtol=1e-3) + + +def test_weibull_min_sas1(): + # Data and SAS results from + # https://support.sas.com/documentation/cdl/en/qcug/63922/HTML/default/ + # viewer.htm#qcug_reliability_sect004.htm + + text = """ + 450 0 460 1 1150 0 1150 0 1560 1 + 1600 0 1660 1 1850 1 1850 1 1850 1 + 1850 1 1850 1 2030 1 2030 1 2030 1 + 2070 0 2070 0 2080 0 2200 1 3000 1 + 3000 1 3000 1 3000 1 3100 0 3200 1 + 3450 0 3750 1 3750 1 4150 1 4150 1 + 4150 1 4150 1 4300 1 4300 1 4300 1 + 4300 1 4600 0 4850 1 4850 1 4850 1 + 4850 1 5000 1 5000 1 5000 1 6100 1 + 6100 0 6100 1 6100 1 6300 1 6450 1 + 6450 1 6700 1 7450 1 7800 1 7800 1 + 8100 1 8100 1 8200 1 8500 1 8500 1 + 8500 1 8750 1 8750 0 8750 1 9400 1 + 9900 1 10100 1 10100 1 10100 1 11500 1 + """ + + life, cens = np.array([int(w) for w in text.split()]).reshape(-1, 2).T + life = life/1000.0 + + data = CensoredData.right_censored(life, cens) + + c, loc, scale = weibull_min.fit(data, floc=0, optimizer=optimizer) + assert_allclose(c, 1.0584, rtol=1e-4) + assert_allclose(scale, 26.2968, rtol=1e-5) + assert loc == 0 + + +def test_weibull_min_sas2(): + # http://support.sas.com/documentation/cdl/en/ormpug/67517/HTML/default/ + # viewer.htm#ormpug_nlpsolver_examples06.htm + + # The last two values are right-censored. + days = np.array([143, 164, 188, 188, 190, 192, 206, 209, 213, 216, 220, + 227, 230, 234, 246, 265, 304, 216, 244]) + + data = CensoredData.right_censored(days, [0]*(len(days) - 2) + [1]*2) + + c, loc, scale = weibull_min.fit(data, 1, loc=100, scale=100, + optimizer=optimizer) + + assert_allclose(c, 2.7112, rtol=5e-4) + assert_allclose(loc, 122.03, rtol=5e-4) + assert_allclose(scale, 108.37, rtol=5e-4) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_crosstab.py b/.venv/Lib/site-packages/scipy/stats/tests/test_crosstab.py new file mode 100644 index 0000000000000000000000000000000000000000..9ec173e2b9a749909014cf9417f64057d13277a2 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_crosstab.py @@ -0,0 +1,115 @@ +import pytest +import numpy as np +from numpy.testing import assert_array_equal, assert_equal +from scipy.stats.contingency import crosstab + + +@pytest.mark.parametrize('sparse', [False, True]) +def test_crosstab_basic(sparse): + a = [0, 0, 9, 9, 0, 0, 9] + b = [2, 1, 3, 1, 2, 3, 3] + expected_avals = [0, 9] + expected_bvals = [1, 2, 3] + expected_count = np.array([[1, 2, 1], + [1, 0, 2]]) + (avals, bvals), count = crosstab(a, b, sparse=sparse) + assert_array_equal(avals, expected_avals) + assert_array_equal(bvals, expected_bvals) + if sparse: + assert_array_equal(count.A, expected_count) + else: + assert_array_equal(count, expected_count) + + +def test_crosstab_basic_1d(): + # Verify that a single input sequence works as expected. + x = [1, 2, 3, 1, 2, 3, 3] + expected_xvals = [1, 2, 3] + expected_count = np.array([2, 2, 3]) + (xvals,), count = crosstab(x) + assert_array_equal(xvals, expected_xvals) + assert_array_equal(count, expected_count) + + +def test_crosstab_basic_3d(): + # Verify the function for three input sequences. + a = 'a' + b = 'b' + x = [0, 0, 9, 9, 0, 0, 9, 9] + y = [a, a, a, a, b, b, b, a] + z = [1, 2, 3, 1, 2, 3, 3, 1] + expected_xvals = [0, 9] + expected_yvals = [a, b] + expected_zvals = [1, 2, 3] + expected_count = np.array([[[1, 1, 0], + [0, 1, 1]], + [[2, 0, 1], + [0, 0, 1]]]) + (xvals, yvals, zvals), count = crosstab(x, y, z) + assert_array_equal(xvals, expected_xvals) + assert_array_equal(yvals, expected_yvals) + assert_array_equal(zvals, expected_zvals) + assert_array_equal(count, expected_count) + + +@pytest.mark.parametrize('sparse', [False, True]) +def test_crosstab_levels(sparse): + a = [0, 0, 9, 9, 0, 0, 9] + b = [1, 2, 3, 1, 2, 3, 3] + expected_avals = [0, 9] + expected_bvals = [0, 1, 2, 3] + expected_count = np.array([[0, 1, 2, 1], + [0, 1, 0, 2]]) + (avals, bvals), count = crosstab(a, b, levels=[None, [0, 1, 2, 3]], + sparse=sparse) + assert_array_equal(avals, expected_avals) + assert_array_equal(bvals, expected_bvals) + if sparse: + assert_array_equal(count.A, expected_count) + else: + assert_array_equal(count, expected_count) + + +@pytest.mark.parametrize('sparse', [False, True]) +def test_crosstab_extra_levels(sparse): + # The pair of values (-1, 3) will be ignored, because we explicitly + # request the counted `a` values to be [0, 9]. + a = [0, 0, 9, 9, 0, 0, 9, -1] + b = [1, 2, 3, 1, 2, 3, 3, 3] + expected_avals = [0, 9] + expected_bvals = [0, 1, 2, 3] + expected_count = np.array([[0, 1, 2, 1], + [0, 1, 0, 2]]) + (avals, bvals), count = crosstab(a, b, levels=[[0, 9], [0, 1, 2, 3]], + sparse=sparse) + assert_array_equal(avals, expected_avals) + assert_array_equal(bvals, expected_bvals) + if sparse: + assert_array_equal(count.A, expected_count) + else: + assert_array_equal(count, expected_count) + + +def test_validation_at_least_one(): + with pytest.raises(TypeError, match='At least one'): + crosstab() + + +def test_validation_same_lengths(): + with pytest.raises(ValueError, match='must have the same length'): + crosstab([1, 2], [1, 2, 3, 4]) + + +def test_validation_sparse_only_two_args(): + with pytest.raises(ValueError, match='only two input sequences'): + crosstab([0, 1, 1], [8, 8, 9], [1, 3, 3], sparse=True) + + +def test_validation_len_levels_matches_args(): + with pytest.raises(ValueError, match='number of input sequences'): + crosstab([0, 1, 1], [8, 8, 9], levels=([0, 1, 2, 3],)) + + +def test_result(): + res = crosstab([0, 1], [1, 2]) + assert_equal((res.elements, res.count), res) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py b/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..25361b9b28206351a8f6acce3b1f3dc3c664b48b --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_basic.py @@ -0,0 +1,548 @@ +import numpy.testing as npt +from numpy.testing import assert_allclose + +import numpy as np +import pytest + +from scipy import stats +from .common_tests import (check_normalization, check_moment, + check_mean_expect, + check_var_expect, check_skew_expect, + check_kurt_expect, check_entropy, + check_private_entropy, check_edge_support, + check_named_args, check_random_state_property, + check_pickling, check_rvs_broadcast, + check_freezing,) +from scipy.stats._distr_params import distdiscrete, invdistdiscrete +from scipy.stats._distn_infrastructure import rv_discrete_frozen + +vals = ([1, 2, 3, 4], [0.1, 0.2, 0.3, 0.4]) +distdiscrete += [[stats.rv_discrete(values=vals), ()]] + +# For these distributions, test_discrete_basic only runs with test mode full +distslow = {'zipfian', 'nhypergeom'} + + +def cases_test_discrete_basic(): + seen = set() + for distname, arg in distdiscrete: + if distname in distslow: + yield pytest.param(distname, arg, distname, marks=pytest.mark.slow) + else: + yield distname, arg, distname not in seen + seen.add(distname) + + +@pytest.mark.parametrize('distname,arg,first_case', cases_test_discrete_basic()) +def test_discrete_basic(distname, arg, first_case): + try: + distfn = getattr(stats, distname) + except TypeError: + distfn = distname + distname = 'sample distribution' + np.random.seed(9765456) + rvs = distfn.rvs(size=2000, *arg) + supp = np.unique(rvs) + m, v = distfn.stats(*arg) + check_cdf_ppf(distfn, arg, supp, distname + ' cdf_ppf') + + check_pmf_cdf(distfn, arg, distname) + check_oth(distfn, arg, supp, distname + ' oth') + check_edge_support(distfn, arg) + + alpha = 0.01 + check_discrete_chisquare(distfn, arg, rvs, alpha, + distname + ' chisquare') + + if first_case: + locscale_defaults = (0,) + meths = [distfn.pmf, distfn.logpmf, distfn.cdf, distfn.logcdf, + distfn.logsf] + # make sure arguments are within support + # for some distributions, this needs to be overridden + spec_k = {'randint': 11, 'hypergeom': 4, 'bernoulli': 0, + 'nchypergeom_wallenius': 6} + k = spec_k.get(distname, 1) + check_named_args(distfn, k, arg, locscale_defaults, meths) + if distname != 'sample distribution': + check_scale_docstring(distfn) + check_random_state_property(distfn, arg) + check_pickling(distfn, arg) + check_freezing(distfn, arg) + + # Entropy + check_entropy(distfn, arg, distname) + if distfn.__class__._entropy != stats.rv_discrete._entropy: + check_private_entropy(distfn, arg, stats.rv_discrete) + + +@pytest.mark.parametrize('distname,arg', distdiscrete) +def test_moments(distname, arg): + try: + distfn = getattr(stats, distname) + except TypeError: + distfn = distname + distname = 'sample distribution' + m, v, s, k = distfn.stats(*arg, moments='mvsk') + check_normalization(distfn, arg, distname) + + # compare `stats` and `moment` methods + check_moment(distfn, arg, m, v, distname) + check_mean_expect(distfn, arg, m, distname) + check_var_expect(distfn, arg, m, v, distname) + check_skew_expect(distfn, arg, m, v, s, distname) + with np.testing.suppress_warnings() as sup: + if distname in ['zipf', 'betanbinom']: + sup.filter(RuntimeWarning) + check_kurt_expect(distfn, arg, m, v, k, distname) + + # frozen distr moments + check_moment_frozen(distfn, arg, m, 1) + check_moment_frozen(distfn, arg, v+m*m, 2) + + +@pytest.mark.parametrize('dist,shape_args', distdiscrete) +def test_rvs_broadcast(dist, shape_args): + # If shape_only is True, it means the _rvs method of the + # distribution uses more than one random number to generate a random + # variate. That means the result of using rvs with broadcasting or + # with a nontrivial size will not necessarily be the same as using the + # numpy.vectorize'd version of rvs(), so we can only compare the shapes + # of the results, not the values. + # Whether or not a distribution is in the following list is an + # implementation detail of the distribution, not a requirement. If + # the implementation the rvs() method of a distribution changes, this + # test might also have to be changed. + shape_only = dist in ['betabinom', 'betanbinom', 'skellam', 'yulesimon', + 'dlaplace', 'nchypergeom_fisher', + 'nchypergeom_wallenius'] + + try: + distfunc = getattr(stats, dist) + except TypeError: + distfunc = dist + dist = f'rv_discrete(values=({dist.xk!r}, {dist.pk!r}))' + loc = np.zeros(2) + nargs = distfunc.numargs + allargs = [] + bshape = [] + # Generate shape parameter arguments... + for k in range(nargs): + shp = (k + 3,) + (1,)*(k + 1) + param_val = shape_args[k] + allargs.append(np.full(shp, param_val)) + bshape.insert(0, shp[0]) + allargs.append(loc) + bshape.append(loc.size) + # bshape holds the expected shape when loc, scale, and the shape + # parameters are all broadcast together. + check_rvs_broadcast( + distfunc, dist, allargs, bshape, shape_only, [np.dtype(int)] + ) + + +@pytest.mark.parametrize('dist,args', distdiscrete) +def test_ppf_with_loc(dist, args): + try: + distfn = getattr(stats, dist) + except TypeError: + distfn = dist + #check with a negative, no and positive relocation. + np.random.seed(1942349) + re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)] + _a, _b = distfn.support(*args) + for loc in re_locs: + npt.assert_array_equal( + [_a-1+loc, _b+loc], + [distfn.ppf(0.0, *args, loc=loc), distfn.ppf(1.0, *args, loc=loc)] + ) + + +@pytest.mark.parametrize('dist, args', distdiscrete) +def test_isf_with_loc(dist, args): + try: + distfn = getattr(stats, dist) + except TypeError: + distfn = dist + # check with a negative, no and positive relocation. + np.random.seed(1942349) + re_locs = [np.random.randint(-10, -1), 0, np.random.randint(1, 10)] + _a, _b = distfn.support(*args) + for loc in re_locs: + expected = _b + loc, _a - 1 + loc + res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc) + npt.assert_array_equal(expected, res) + # test broadcasting behaviour + re_locs = [np.random.randint(-10, -1, size=(5, 3)), + np.zeros((5, 3)), + np.random.randint(1, 10, size=(5, 3))] + _a, _b = distfn.support(*args) + for loc in re_locs: + expected = _b + loc, _a - 1 + loc + res = distfn.isf(0., *args, loc=loc), distfn.isf(1., *args, loc=loc) + npt.assert_array_equal(expected, res) + + +def check_cdf_ppf(distfn, arg, supp, msg): + # supp is assumed to be an array of integers in the support of distfn + # (but not necessarily all the integers in the support). + # This test assumes that the PMF of any value in the support of the + # distribution is greater than 1e-8. + + # cdf is a step function, and ppf(q) = min{k : cdf(k) >= q, k integer} + cdf_supp = distfn.cdf(supp, *arg) + # In very rare cases, the finite precision calculation of ppf(cdf(supp)) + # can produce an array in which an element is off by one. We nudge the + # CDF values down by 15 ULPs help to avoid this. + cdf_supp0 = cdf_supp - 15*np.spacing(cdf_supp) + npt.assert_array_equal(distfn.ppf(cdf_supp0, *arg), + supp, msg + '-roundtrip') + # Repeat the same calculation, but with the CDF values decreased by 1e-8. + npt.assert_array_equal(distfn.ppf(distfn.cdf(supp, *arg) - 1e-8, *arg), + supp, msg + '-roundtrip') + + if not hasattr(distfn, 'xk'): + _a, _b = distfn.support(*arg) + supp1 = supp[supp < _b] + npt.assert_array_equal(distfn.ppf(distfn.cdf(supp1, *arg) + 1e-8, *arg), + supp1 + distfn.inc, msg + ' ppf-cdf-next') + + +def check_pmf_cdf(distfn, arg, distname): + if hasattr(distfn, 'xk'): + index = distfn.xk + else: + startind = int(distfn.ppf(0.01, *arg) - 1) + index = list(range(startind, startind + 10)) + cdfs = distfn.cdf(index, *arg) + pmfs_cum = distfn.pmf(index, *arg).cumsum() + + atol, rtol = 1e-10, 1e-10 + if distname == 'skellam': # ncx2 accuracy + atol, rtol = 1e-5, 1e-5 + npt.assert_allclose(cdfs - cdfs[0], pmfs_cum - pmfs_cum[0], + atol=atol, rtol=rtol) + + # also check that pmf at non-integral k is zero + k = np.asarray(index) + k_shifted = k[:-1] + np.diff(k)/2 + npt.assert_equal(distfn.pmf(k_shifted, *arg), 0) + + # better check frozen distributions, and also when loc != 0 + loc = 0.5 + dist = distfn(loc=loc, *arg) + npt.assert_allclose(dist.pmf(k[1:] + loc), np.diff(dist.cdf(k + loc))) + npt.assert_equal(dist.pmf(k_shifted + loc), 0) + + +def check_moment_frozen(distfn, arg, m, k): + npt.assert_allclose(distfn(*arg).moment(k), m, + atol=1e-10, rtol=1e-10) + + +def check_oth(distfn, arg, supp, msg): + # checking other methods of distfn + npt.assert_allclose(distfn.sf(supp, *arg), 1. - distfn.cdf(supp, *arg), + atol=1e-10, rtol=1e-10) + + q = np.linspace(0.01, 0.99, 20) + npt.assert_allclose(distfn.isf(q, *arg), distfn.ppf(1. - q, *arg), + atol=1e-10, rtol=1e-10) + + median_sf = distfn.isf(0.5, *arg) + npt.assert_(distfn.sf(median_sf - 1, *arg) > 0.5) + npt.assert_(distfn.cdf(median_sf + 1, *arg) > 0.5) + + +def check_discrete_chisquare(distfn, arg, rvs, alpha, msg): + """Perform chisquare test for random sample of a discrete distribution + + Parameters + ---------- + distname : string + name of distribution function + arg : sequence + parameters of distribution + alpha : float + significance level, threshold for p-value + + Returns + ------- + result : bool + 0 if test passes, 1 if test fails + + """ + wsupp = 0.05 + + # construct intervals with minimum mass `wsupp`. + # intervals are left-half-open as in a cdf difference + _a, _b = distfn.support(*arg) + lo = int(max(_a, -1000)) + high = int(min(_b, 1000)) + 1 + distsupport = range(lo, high) + last = 0 + distsupp = [lo] + distmass = [] + for ii in distsupport: + current = distfn.cdf(ii, *arg) + if current - last >= wsupp - 1e-14: + distsupp.append(ii) + distmass.append(current - last) + last = current + if current > (1 - wsupp): + break + if distsupp[-1] < _b: + distsupp.append(_b) + distmass.append(1 - last) + distsupp = np.array(distsupp) + distmass = np.array(distmass) + + # convert intervals to right-half-open as required by histogram + histsupp = distsupp + 1e-8 + histsupp[0] = _a + + # find sample frequencies and perform chisquare test + freq, hsupp = np.histogram(rvs, histsupp) + chis, pval = stats.chisquare(np.array(freq), len(rvs)*distmass) + + npt.assert_( + pval > alpha, + f'chisquare - test for {msg} at arg = {str(arg)} with pval = {str(pval)}' + ) + + +def check_scale_docstring(distfn): + if distfn.__doc__ is not None: + # Docstrings can be stripped if interpreter is run with -OO + npt.assert_('scale' not in distfn.__doc__) + + +@pytest.mark.parametrize('method', ['pmf', 'logpmf', 'cdf', 'logcdf', + 'sf', 'logsf', 'ppf', 'isf']) +@pytest.mark.parametrize('distname, args', distdiscrete) +def test_methods_with_lists(method, distname, args): + # Test that the discrete distributions can accept Python lists + # as arguments. + try: + dist = getattr(stats, distname) + except TypeError: + return + if method in ['ppf', 'isf']: + z = [0.1, 0.2] + else: + z = [0, 1] + p2 = [[p]*2 for p in args] + loc = [0, 1] + result = dist.pmf(z, *p2, loc=loc) + npt.assert_allclose(result, + [dist.pmf(*v) for v in zip(z, *p2, loc)], + rtol=1e-15, atol=1e-15) + + +@pytest.mark.parametrize('distname, args', invdistdiscrete) +def test_cdf_gh13280_regression(distname, args): + # Test for nan output when shape parameters are invalid + dist = getattr(stats, distname) + x = np.arange(-2, 15) + vals = dist.cdf(x, *args) + expected = np.nan + npt.assert_equal(vals, expected) + + +def cases_test_discrete_integer_shapes(): + # distributions parameters that are only allowed to be integral when + # fitting, but are allowed to be real as input to PDF, etc. + integrality_exceptions = {'nbinom': {'n'}, 'betanbinom': {'n'}} + + seen = set() + for distname, shapes in distdiscrete: + if distname in seen: + continue + seen.add(distname) + + try: + dist = getattr(stats, distname) + except TypeError: + continue + + shape_info = dist._shape_info() + + for i, shape in enumerate(shape_info): + if (shape.name in integrality_exceptions.get(distname, set()) or + not shape.integrality): + continue + + yield distname, shape.name, shapes + + +@pytest.mark.parametrize('distname, shapename, shapes', + cases_test_discrete_integer_shapes()) +def test_integer_shapes(distname, shapename, shapes): + dist = getattr(stats, distname) + shape_info = dist._shape_info() + shape_names = [shape.name for shape in shape_info] + i = shape_names.index(shapename) # this element of params must be integral + + shapes_copy = list(shapes) + + valid_shape = shapes[i] + invalid_shape = valid_shape - 0.5 # arbitrary non-integral value + new_valid_shape = valid_shape - 1 + shapes_copy[i] = [[valid_shape], [invalid_shape], [new_valid_shape]] + + a, b = dist.support(*shapes) + x = np.round(np.linspace(a, b, 5)) + + pmf = dist.pmf(x, *shapes_copy) + assert not np.any(np.isnan(pmf[0, :])) + assert np.all(np.isnan(pmf[1, :])) + assert not np.any(np.isnan(pmf[2, :])) + + +def test_frozen_attributes(): + # gh-14827 reported that all frozen distributions had both pmf and pdf + # attributes; continuous should have pdf and discrete should have pmf. + message = "'rv_discrete_frozen' object has no attribute" + with pytest.raises(AttributeError, match=message): + stats.binom(10, 0.5).pdf + with pytest.raises(AttributeError, match=message): + stats.binom(10, 0.5).logpdf + stats.binom.pdf = "herring" + frozen_binom = stats.binom(10, 0.5) + assert isinstance(frozen_binom, rv_discrete_frozen) + delattr(stats.binom, 'pdf') + + +@pytest.mark.parametrize('distname, shapes', distdiscrete) +def test_interval(distname, shapes): + # gh-11026 reported that `interval` returns incorrect values when + # `confidence=1`. The values were not incorrect, but it was not intuitive + # that the left end of the interval should extend beyond the support of the + # distribution. Confirm that this is the behavior for all distributions. + if isinstance(distname, str): + dist = getattr(stats, distname) + else: + dist = distname + a, b = dist.support(*shapes) + npt.assert_equal(dist.ppf([0, 1], *shapes), (a-1, b)) + npt.assert_equal(dist.isf([1, 0], *shapes), (a-1, b)) + npt.assert_equal(dist.interval(1, *shapes), (a-1, b)) + + +@pytest.mark.xfail_on_32bit("Sensible to machine precision") +def test_rv_sample(): + # Thoroughly test rv_sample and check that gh-3758 is resolved + + # Generate a random discrete distribution + rng = np.random.default_rng(98430143469) + xk = np.sort(rng.random(10) * 10) + pk = rng.random(10) + pk /= np.sum(pk) + dist = stats.rv_discrete(values=(xk, pk)) + + # Generate points to the left and right of xk + xk_left = (np.array([0] + xk[:-1].tolist()) + xk)/2 + xk_right = (np.array(xk[1:].tolist() + [xk[-1]+1]) + xk)/2 + + # Generate points to the left and right of cdf + cdf2 = np.cumsum(pk) + cdf2_left = (np.array([0] + cdf2[:-1].tolist()) + cdf2)/2 + cdf2_right = (np.array(cdf2[1:].tolist() + [1]) + cdf2)/2 + + # support - leftmost and rightmost xk + a, b = dist.support() + assert_allclose(a, xk[0]) + assert_allclose(b, xk[-1]) + + # pmf - supported only on the xk + assert_allclose(dist.pmf(xk), pk) + assert_allclose(dist.pmf(xk_right), 0) + assert_allclose(dist.pmf(xk_left), 0) + + # logpmf is log of the pmf; log(0) = -np.inf + with np.errstate(divide='ignore'): + assert_allclose(dist.logpmf(xk), np.log(pk)) + assert_allclose(dist.logpmf(xk_right), -np.inf) + assert_allclose(dist.logpmf(xk_left), -np.inf) + + # cdf - the cumulative sum of the pmf + assert_allclose(dist.cdf(xk), cdf2) + assert_allclose(dist.cdf(xk_right), cdf2) + assert_allclose(dist.cdf(xk_left), [0]+cdf2[:-1].tolist()) + + with np.errstate(divide='ignore'): + assert_allclose(dist.logcdf(xk), np.log(dist.cdf(xk)), + atol=1e-15) + assert_allclose(dist.logcdf(xk_right), np.log(dist.cdf(xk_right)), + atol=1e-15) + assert_allclose(dist.logcdf(xk_left), np.log(dist.cdf(xk_left)), + atol=1e-15) + + # sf is 1-cdf + assert_allclose(dist.sf(xk), 1-dist.cdf(xk)) + assert_allclose(dist.sf(xk_right), 1-dist.cdf(xk_right)) + assert_allclose(dist.sf(xk_left), 1-dist.cdf(xk_left)) + + with np.errstate(divide='ignore'): + assert_allclose(dist.logsf(xk), np.log(dist.sf(xk)), + atol=1e-15) + assert_allclose(dist.logsf(xk_right), np.log(dist.sf(xk_right)), + atol=1e-15) + assert_allclose(dist.logsf(xk_left), np.log(dist.sf(xk_left)), + atol=1e-15) + + # ppf + assert_allclose(dist.ppf(cdf2), xk) + assert_allclose(dist.ppf(cdf2_left), xk) + assert_allclose(dist.ppf(cdf2_right)[:-1], xk[1:]) + assert_allclose(dist.ppf(0), a - 1) + assert_allclose(dist.ppf(1), b) + + # isf + sf2 = dist.sf(xk) + assert_allclose(dist.isf(sf2), xk) + assert_allclose(dist.isf(1-cdf2_left), dist.ppf(cdf2_left)) + assert_allclose(dist.isf(1-cdf2_right), dist.ppf(cdf2_right)) + assert_allclose(dist.isf(0), b) + assert_allclose(dist.isf(1), a - 1) + + # interval is (ppf(alpha/2), isf(alpha/2)) + ps = np.linspace(0.01, 0.99, 10) + int2 = dist.ppf(ps/2), dist.isf(ps/2) + assert_allclose(dist.interval(1-ps), int2) + assert_allclose(dist.interval(0), dist.median()) + assert_allclose(dist.interval(1), (a-1, b)) + + # median is simply ppf(0.5) + med2 = dist.ppf(0.5) + assert_allclose(dist.median(), med2) + + # all four stats (mean, var, skew, and kurtosis) from the definitions + mean2 = np.sum(xk*pk) + var2 = np.sum((xk - mean2)**2 * pk) + skew2 = np.sum((xk - mean2)**3 * pk) / var2**(3/2) + kurt2 = np.sum((xk - mean2)**4 * pk) / var2**2 - 3 + assert_allclose(dist.mean(), mean2) + assert_allclose(dist.std(), np.sqrt(var2)) + assert_allclose(dist.var(), var2) + assert_allclose(dist.stats(moments='mvsk'), (mean2, var2, skew2, kurt2)) + + # noncentral moment against definition + mom3 = np.sum((xk**3) * pk) + assert_allclose(dist.moment(3), mom3) + + # expect - check against moments + assert_allclose(dist.expect(lambda x: 1), 1) + assert_allclose(dist.expect(), mean2) + assert_allclose(dist.expect(lambda x: x**3), mom3) + + # entropy is the negative of the expected value of log(p) + with np.errstate(divide='ignore'): + assert_allclose(-dist.expect(lambda x: dist.logpmf(x)), dist.entropy()) + + # RVS is just ppf of uniform random variates + rng = np.random.default_rng(98430143469) + rvs = dist.rvs(size=100, random_state=rng) + rng = np.random.default_rng(98430143469) + rvs0 = dist.ppf(rng.random(size=100)) + assert_allclose(rvs, rvs0) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py b/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py new file mode 100644 index 0000000000000000000000000000000000000000..eadec00914852d63c0e1f2c34a0d0c2512c6e4e7 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_discrete_distns.py @@ -0,0 +1,648 @@ +import pytest +import itertools + +from scipy.stats import (betabinom, betanbinom, hypergeom, nhypergeom, + bernoulli, boltzmann, skellam, zipf, zipfian, binom, + nbinom, nchypergeom_fisher, nchypergeom_wallenius, + randint) + +import numpy as np +from numpy.testing import ( + assert_almost_equal, assert_equal, assert_allclose, suppress_warnings +) +from scipy.special import binom as special_binom +from scipy.optimize import root_scalar +from scipy.integrate import quad + + +# The expected values were computed with Wolfram Alpha, using +# the expression CDF[HypergeometricDistribution[N, n, M], k]. +@pytest.mark.parametrize('k, M, n, N, expected, rtol', + [(3, 10, 4, 5, + 0.9761904761904762, 1e-15), + (107, 10000, 3000, 215, + 0.9999999997226765, 1e-15), + (10, 10000, 3000, 215, + 2.681682217692179e-21, 5e-11)]) +def test_hypergeom_cdf(k, M, n, N, expected, rtol): + p = hypergeom.cdf(k, M, n, N) + assert_allclose(p, expected, rtol=rtol) + + +# The expected values were computed with Wolfram Alpha, using +# the expression SurvivalFunction[HypergeometricDistribution[N, n, M], k]. +@pytest.mark.parametrize('k, M, n, N, expected, rtol', + [(25, 10000, 3000, 215, + 0.9999999999052958, 1e-15), + (125, 10000, 3000, 215, + 1.4416781705752128e-18, 5e-11)]) +def test_hypergeom_sf(k, M, n, N, expected, rtol): + p = hypergeom.sf(k, M, n, N) + assert_allclose(p, expected, rtol=rtol) + + +def test_hypergeom_logpmf(): + # symmetries test + # f(k,N,K,n) = f(n-k,N,N-K,n) = f(K-k,N,K,N-n) = f(k,N,n,K) + k = 5 + N = 50 + K = 10 + n = 5 + logpmf1 = hypergeom.logpmf(k, N, K, n) + logpmf2 = hypergeom.logpmf(n - k, N, N - K, n) + logpmf3 = hypergeom.logpmf(K - k, N, K, N - n) + logpmf4 = hypergeom.logpmf(k, N, n, K) + assert_almost_equal(logpmf1, logpmf2, decimal=12) + assert_almost_equal(logpmf1, logpmf3, decimal=12) + assert_almost_equal(logpmf1, logpmf4, decimal=12) + + # test related distribution + # Bernoulli distribution if n = 1 + k = 1 + N = 10 + K = 7 + n = 1 + hypergeom_logpmf = hypergeom.logpmf(k, N, K, n) + bernoulli_logpmf = bernoulli.logpmf(k, K/N) + assert_almost_equal(hypergeom_logpmf, bernoulli_logpmf, decimal=12) + + +def test_nhypergeom_pmf(): + # test with hypergeom + M, n, r = 45, 13, 8 + k = 6 + NHG = nhypergeom.pmf(k, M, n, r) + HG = hypergeom.pmf(k, M, n, k+r-1) * (M - n - (r-1)) / (M - (k+r-1)) + assert_allclose(HG, NHG, rtol=1e-10) + + +def test_nhypergeom_pmfcdf(): + # test pmf and cdf with arbitrary values. + M = 8 + n = 3 + r = 4 + support = np.arange(n+1) + pmf = nhypergeom.pmf(support, M, n, r) + cdf = nhypergeom.cdf(support, M, n, r) + assert_allclose(pmf, [1/14, 3/14, 5/14, 5/14], rtol=1e-13) + assert_allclose(cdf, [1/14, 4/14, 9/14, 1.0], rtol=1e-13) + + +def test_nhypergeom_r0(): + # test with `r = 0`. + M = 10 + n = 3 + r = 0 + pmf = nhypergeom.pmf([[0, 1, 2, 0], [1, 2, 0, 3]], M, n, r) + assert_allclose(pmf, [[1, 0, 0, 1], [0, 0, 1, 0]], rtol=1e-13) + + +def test_nhypergeom_rvs_shape(): + # Check that when given a size with more dimensions than the + # dimensions of the broadcast parameters, rvs returns an array + # with the correct shape. + x = nhypergeom.rvs(22, [7, 8, 9], [[12], [13]], size=(5, 1, 2, 3)) + assert x.shape == (5, 1, 2, 3) + + +def test_nhypergeom_accuracy(): + # Check that nhypergeom.rvs post-gh-13431 gives the same values as + # inverse transform sampling + np.random.seed(0) + x = nhypergeom.rvs(22, 7, 11, size=100) + np.random.seed(0) + p = np.random.uniform(size=100) + y = nhypergeom.ppf(p, 22, 7, 11) + assert_equal(x, y) + + +def test_boltzmann_upper_bound(): + k = np.arange(-3, 5) + + N = 1 + p = boltzmann.pmf(k, 0.123, N) + expected = k == 0 + assert_equal(p, expected) + + lam = np.log(2) + N = 3 + p = boltzmann.pmf(k, lam, N) + expected = [0, 0, 0, 4/7, 2/7, 1/7, 0, 0] + assert_allclose(p, expected, rtol=1e-13) + + c = boltzmann.cdf(k, lam, N) + expected = [0, 0, 0, 4/7, 6/7, 1, 1, 1] + assert_allclose(c, expected, rtol=1e-13) + + +def test_betabinom_a_and_b_unity(): + # test limiting case that betabinom(n, 1, 1) is a discrete uniform + # distribution from 0 to n + n = 20 + k = np.arange(n + 1) + p = betabinom(n, 1, 1).pmf(k) + expected = np.repeat(1 / (n + 1), n + 1) + assert_almost_equal(p, expected) + + +@pytest.mark.parametrize('dtypes', itertools.product(*[(int, float)]*3)) +def test_betabinom_stats_a_and_b_integers_gh18026(dtypes): + # gh-18026 reported that `betabinom` kurtosis calculation fails when some + # parameters are integers. Check that this is resolved. + n_type, a_type, b_type = dtypes + n, a, b = n_type(10), a_type(2), b_type(3) + assert_allclose(betabinom.stats(n, a, b, moments='k'), -0.6904761904761907) + + +def test_betabinom_bernoulli(): + # test limiting case that betabinom(1, a, b) = bernoulli(a / (a + b)) + a = 2.3 + b = 0.63 + k = np.arange(2) + p = betabinom(1, a, b).pmf(k) + expected = bernoulli(a / (a + b)).pmf(k) + assert_almost_equal(p, expected) + + +def test_issue_10317(): + alpha, n, p = 0.9, 10, 1 + assert_equal(nbinom.interval(confidence=alpha, n=n, p=p), (0, 0)) + + +def test_issue_11134(): + alpha, n, p = 0.95, 10, 0 + assert_equal(binom.interval(confidence=alpha, n=n, p=p), (0, 0)) + + +def test_issue_7406(): + np.random.seed(0) + assert_equal(binom.ppf(np.random.rand(10), 0, 0.5), 0) + + # Also check that endpoints (q=0, q=1) are correct + assert_equal(binom.ppf(0, 0, 0.5), -1) + assert_equal(binom.ppf(1, 0, 0.5), 0) + + +def test_issue_5122(): + p = 0 + n = np.random.randint(100, size=10) + + x = 0 + ppf = binom.ppf(x, n, p) + assert_equal(ppf, -1) + + x = np.linspace(0.01, 0.99, 10) + ppf = binom.ppf(x, n, p) + assert_equal(ppf, 0) + + x = 1 + ppf = binom.ppf(x, n, p) + assert_equal(ppf, n) + + +def test_issue_1603(): + assert_equal(binom(1000, np.logspace(-3, -100)).ppf(0.01), 0) + + +def test_issue_5503(): + p = 0.5 + x = np.logspace(3, 14, 12) + assert_allclose(binom.cdf(x, 2*x, p), 0.5, atol=1e-2) + + +@pytest.mark.parametrize('x, n, p, cdf_desired', [ + (300, 1000, 3/10, 0.51559351981411995636), + (3000, 10000, 3/10, 0.50493298381929698016), + (30000, 100000, 3/10, 0.50156000591726422864), + (300000, 1000000, 3/10, 0.50049331906666960038), + (3000000, 10000000, 3/10, 0.50015600124585261196), + (30000000, 100000000, 3/10, 0.50004933192735230102), + (30010000, 100000000, 3/10, 0.98545384016570790717), + (29990000, 100000000, 3/10, 0.01455017177985268670), + (29950000, 100000000, 3/10, 5.02250963487432024943e-28), +]) +def test_issue_5503pt2(x, n, p, cdf_desired): + assert_allclose(binom.cdf(x, n, p), cdf_desired) + + +def test_issue_5503pt3(): + # From Wolfram Alpha: CDF[BinomialDistribution[1e12, 1e-12], 2] + assert_allclose(binom.cdf(2, 10**12, 10**-12), 0.91969860292869777384) + + +def test_issue_6682(): + # Reference value from R: + # options(digits=16) + # print(pnbinom(250, 50, 32/63, lower.tail=FALSE)) + assert_allclose(nbinom.sf(250, 50, 32./63.), 1.460458510976452e-35) + + +def test_issue_19747(): + # test that negative k does not raise an error in nbinom.logcdf + result = nbinom.logcdf([5, -1, 1], 5, 0.5) + reference = [-0.47313352, -np.inf, -2.21297293] + assert_allclose(result, reference) + + +def test_boost_divide_by_zero_issue_15101(): + n = 1000 + p = 0.01 + k = 996 + assert_allclose(binom.pmf(k, n, p), 0.0) + + +def test_skellam_gh11474(): + # test issue reported in gh-11474 caused by `cdfchn` + mu = [1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000] + cdf = skellam.cdf(0, mu, mu) + # generated in R + # library(skellam) + # options(digits = 16) + # mu = c(1, 10, 100, 1000, 5000, 5050, 5100, 5250, 6000) + # pskellam(0, mu, mu, TRUE) + cdf_expected = [0.6542541612768356, 0.5448901559424127, 0.5141135799745580, + 0.5044605891382528, 0.5019947363350450, 0.5019848365953181, + 0.5019750827993392, 0.5019466621805060, 0.5018209330219539] + assert_allclose(cdf, cdf_expected) + + +class TestZipfian: + def test_zipfian_asymptotic(self): + # test limiting case that zipfian(a, n) -> zipf(a) as n-> oo + a = 6.5 + N = 10000000 + k = np.arange(1, 21) + assert_allclose(zipfian.pmf(k, a, N), zipf.pmf(k, a)) + assert_allclose(zipfian.cdf(k, a, N), zipf.cdf(k, a)) + assert_allclose(zipfian.sf(k, a, N), zipf.sf(k, a)) + assert_allclose(zipfian.stats(a, N, moments='msvk'), + zipf.stats(a, moments='msvk')) + + def test_zipfian_continuity(self): + # test that zipfian(0.999999, n) ~ zipfian(1.000001, n) + # (a = 1 switches between methods of calculating harmonic sum) + alt1, agt1 = 0.99999999, 1.00000001 + N = 30 + k = np.arange(1, N + 1) + assert_allclose(zipfian.pmf(k, alt1, N), zipfian.pmf(k, agt1, N), + rtol=5e-7) + assert_allclose(zipfian.cdf(k, alt1, N), zipfian.cdf(k, agt1, N), + rtol=5e-7) + assert_allclose(zipfian.sf(k, alt1, N), zipfian.sf(k, agt1, N), + rtol=5e-7) + assert_allclose(zipfian.stats(alt1, N, moments='msvk'), + zipfian.stats(agt1, N, moments='msvk'), rtol=5e-7) + + def test_zipfian_R(self): + # test against R VGAM package + # library(VGAM) + # k <- c(13, 16, 1, 4, 4, 8, 10, 19, 5, 7) + # a <- c(1.56712977, 3.72656295, 5.77665117, 9.12168729, 5.79977172, + # 4.92784796, 9.36078764, 4.3739616 , 7.48171872, 4.6824154) + # n <- c(70, 80, 48, 65, 83, 89, 50, 30, 20, 20) + # pmf <- dzipf(k, N = n, shape = a) + # cdf <- pzipf(k, N = n, shape = a) + # print(pmf) + # print(cdf) + np.random.seed(0) + k = np.random.randint(1, 20, size=10) + a = np.random.rand(10)*10 + 1 + n = np.random.randint(1, 100, size=10) + pmf = [8.076972e-03, 2.950214e-05, 9.799333e-01, 3.216601e-06, + 3.158895e-04, 3.412497e-05, 4.350472e-10, 2.405773e-06, + 5.860662e-06, 1.053948e-04] + cdf = [0.8964133, 0.9998666, 0.9799333, 0.9999995, 0.9998584, + 0.9999458, 1.0000000, 0.9999920, 0.9999977, 0.9998498] + # skip the first point; zipUC is not accurate for low a, n + assert_allclose(zipfian.pmf(k, a, n)[1:], pmf[1:], rtol=1e-6) + assert_allclose(zipfian.cdf(k, a, n)[1:], cdf[1:], rtol=5e-5) + + np.random.seed(0) + naive_tests = np.vstack((np.logspace(-2, 1, 10), + np.random.randint(2, 40, 10))).T + + @pytest.mark.parametrize("a, n", naive_tests) + def test_zipfian_naive(self, a, n): + # test against bare-bones implementation + + @np.vectorize + def Hns(n, s): + """Naive implementation of harmonic sum""" + return (1/np.arange(1, n+1)**s).sum() + + @np.vectorize + def pzip(k, a, n): + """Naive implementation of zipfian pmf""" + if k < 1 or k > n: + return 0. + else: + return 1 / k**a / Hns(n, a) + + k = np.arange(n+1) + pmf = pzip(k, a, n) + cdf = np.cumsum(pmf) + mean = np.average(k, weights=pmf) + var = np.average((k - mean)**2, weights=pmf) + std = var**0.5 + skew = np.average(((k-mean)/std)**3, weights=pmf) + kurtosis = np.average(((k-mean)/std)**4, weights=pmf) - 3 + assert_allclose(zipfian.pmf(k, a, n), pmf) + assert_allclose(zipfian.cdf(k, a, n), cdf) + assert_allclose(zipfian.stats(a, n, moments="mvsk"), + [mean, var, skew, kurtosis]) + + def test_pmf_integer_k(self): + k = np.arange(0, 1000) + k_int32 = k.astype(np.int32) + dist = zipfian(111, 22) + pmf = dist.pmf(k) + pmf_k_int32 = dist.pmf(k_int32) + assert_equal(pmf, pmf_k_int32) + + +class TestNCH: + np.random.seed(2) # seeds 0 and 1 had some xl = xu; randint failed + shape = (2, 4, 3) + max_m = 100 + m1 = np.random.randint(1, max_m, size=shape) # red balls + m2 = np.random.randint(1, max_m, size=shape) # white balls + N = m1 + m2 # total balls + n = randint.rvs(0, N, size=N.shape) # number of draws + xl = np.maximum(0, n-m2) # lower bound of support + xu = np.minimum(n, m1) # upper bound of support + x = randint.rvs(xl, xu, size=xl.shape) + odds = np.random.rand(*x.shape)*2 + + # test output is more readable when function names (strings) are passed + @pytest.mark.parametrize('dist_name', + ['nchypergeom_fisher', 'nchypergeom_wallenius']) + def test_nch_hypergeom(self, dist_name): + # Both noncentral hypergeometric distributions reduce to the + # hypergeometric distribution when odds = 1 + dists = {'nchypergeom_fisher': nchypergeom_fisher, + 'nchypergeom_wallenius': nchypergeom_wallenius} + dist = dists[dist_name] + x, N, m1, n = self.x, self.N, self.m1, self.n + assert_allclose(dist.pmf(x, N, m1, n, odds=1), + hypergeom.pmf(x, N, m1, n)) + + def test_nchypergeom_fisher_naive(self): + # test against a very simple implementation + x, N, m1, n, odds = self.x, self.N, self.m1, self.n, self.odds + + @np.vectorize + def pmf_mean_var(x, N, m1, n, w): + # simple implementation of nchypergeom_fisher pmf + m2 = N - m1 + xl = np.maximum(0, n-m2) + xu = np.minimum(n, m1) + + def f(x): + t1 = special_binom(m1, x) + t2 = special_binom(m2, n - x) + return t1 * t2 * w**x + + def P(k): + return sum(f(y)*y**k for y in range(xl, xu + 1)) + + P0 = P(0) + P1 = P(1) + P2 = P(2) + pmf = f(x) / P0 + mean = P1 / P0 + var = P2 / P0 - (P1 / P0)**2 + return pmf, mean, var + + pmf, mean, var = pmf_mean_var(x, N, m1, n, odds) + assert_allclose(nchypergeom_fisher.pmf(x, N, m1, n, odds), pmf) + assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='m'), + mean) + assert_allclose(nchypergeom_fisher.stats(N, m1, n, odds, moments='v'), + var) + + def test_nchypergeom_wallenius_naive(self): + # test against a very simple implementation + + np.random.seed(2) + shape = (2, 4, 3) + max_m = 100 + m1 = np.random.randint(1, max_m, size=shape) + m2 = np.random.randint(1, max_m, size=shape) + N = m1 + m2 + n = randint.rvs(0, N, size=N.shape) + xl = np.maximum(0, n-m2) + xu = np.minimum(n, m1) + x = randint.rvs(xl, xu, size=xl.shape) + w = np.random.rand(*x.shape)*2 + + def support(N, m1, n, w): + m2 = N - m1 + xl = np.maximum(0, n-m2) + xu = np.minimum(n, m1) + return xl, xu + + @np.vectorize + def mean(N, m1, n, w): + m2 = N - m1 + xl, xu = support(N, m1, n, w) + + def fun(u): + return u/m1 + (1 - (n-u)/m2)**w - 1 + + return root_scalar(fun, bracket=(xl, xu)).root + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + message="invalid value encountered in mean") + assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w), + mean(N, m1, n, w), rtol=2e-2) + + @np.vectorize + def variance(N, m1, n, w): + m2 = N - m1 + u = mean(N, m1, n, w) + a = u * (m1 - u) + b = (n-u)*(u + m2 - n) + return N*a*b / ((N-1) * (m1*b + m2*a)) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, + message="invalid value encountered in mean") + assert_allclose( + nchypergeom_wallenius.stats(N, m1, n, w, moments='v'), + variance(N, m1, n, w), + rtol=5e-2 + ) + + @np.vectorize + def pmf(x, N, m1, n, w): + m2 = N - m1 + xl, xu = support(N, m1, n, w) + + def integrand(t): + D = w*(m1 - x) + (m2 - (n-x)) + res = (1-t**(w/D))**x * (1-t**(1/D))**(n-x) + return res + + def f(x): + t1 = special_binom(m1, x) + t2 = special_binom(m2, n - x) + the_integral = quad(integrand, 0, 1, + epsrel=1e-16, epsabs=1e-16) + return t1 * t2 * the_integral[0] + + return f(x) + + pmf0 = pmf(x, N, m1, n, w) + pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w) + + atol, rtol = 1e-6, 1e-6 + i = np.abs(pmf1 - pmf0) < atol + rtol*np.abs(pmf0) + assert i.sum() > np.prod(shape) / 2 # works at least half the time + + # for those that fail, discredit the naive implementation + for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]): + # get the support + m2 = N - m1 + xl, xu = support(N, m1, n, w) + x = np.arange(xl, xu + 1) + + # calculate sum of pmf over the support + # the naive implementation is very wrong in these cases + assert pmf(x, N, m1, n, w).sum() < .5 + assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1) + + def test_wallenius_against_mpmath(self): + # precompute data with mpmath since naive implementation above + # is not reliable. See source code in gh-13330. + M = 50 + n = 30 + N = 20 + odds = 2.25 + # Expected results, computed with mpmath. + sup = np.arange(21) + pmf = np.array([3.699003068656875e-20, + 5.89398584245431e-17, + 2.1594437742911123e-14, + 3.221458044649955e-12, + 2.4658279241205077e-10, + 1.0965862603981212e-08, + 3.057890479665704e-07, + 5.622818831643761e-06, + 7.056482841531681e-05, + 0.000618899425358671, + 0.003854172932571669, + 0.01720592676256026, + 0.05528844897093792, + 0.12772363313574242, + 0.21065898367825722, + 0.24465958845359234, + 0.1955114898110033, + 0.10355390084949237, + 0.03414490375225675, + 0.006231989845775931, + 0.0004715577304677075]) + mean = 14.808018384813426 + var = 2.6085975877923717 + + # nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2) + # has only three digits of accuracy (~ 2.1511e-14). + assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf, + rtol=1e-13, atol=1e-13) + assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds), + mean, rtol=1e-13) + assert_allclose(nchypergeom_wallenius.var(M, n, N, odds), + var, rtol=1e-11) + + @pytest.mark.parametrize('dist_name', + ['nchypergeom_fisher', 'nchypergeom_wallenius']) + def test_rvs_shape(self, dist_name): + # Check that when given a size with more dimensions than the + # dimensions of the broadcast parameters, rvs returns an array + # with the correct shape. + dists = {'nchypergeom_fisher': nchypergeom_fisher, + 'nchypergeom_wallenius': nchypergeom_wallenius} + dist = dists[dist_name] + x = dist.rvs(50, 30, [[10], [20]], [0.5, 1.0, 2.0], size=(5, 1, 2, 3)) + assert x.shape == (5, 1, 2, 3) + + +@pytest.mark.parametrize("mu, q, expected", + [[10, 120, -1.240089881791596e-38], + [1500, 0, -86.61466680572661]]) +def test_nbinom_11465(mu, q, expected): + # test nbinom.logcdf at extreme tails + size = 20 + n, p = size, size/(size+mu) + # In R: + # options(digits=16) + # pnbinom(mu=10, size=20, q=120, log.p=TRUE) + assert_allclose(nbinom.logcdf(q, n, p), expected) + + +def test_gh_17146(): + # Check that discrete distributions return PMF of zero at non-integral x. + # See gh-17146. + x = np.linspace(0, 1, 11) + p = 0.8 + pmf = bernoulli(p).pmf(x) + i = (x % 1 == 0) + assert_allclose(pmf[-1], p) + assert_allclose(pmf[0], 1-p) + assert_equal(pmf[~i], 0) + + +class TestBetaNBinom: + @pytest.mark.parametrize('x, n, a, b, ref', + [[5, 5e6, 5, 20, 1.1520944824139114e-107], + [100, 50, 5, 20, 0.002855762954310226], + [10000, 1000, 5, 20, 1.9648515726019154e-05]]) + def test_betanbinom_pmf(self, x, n, a, b, ref): + # test that PMF stays accurate in the distribution tails + # reference values computed with mpmath + # from mpmath import mp + # mp.dps = 500 + # def betanbinom_pmf(k, n, a, b): + # k = mp.mpf(k) + # a = mp.mpf(a) + # b = mp.mpf(b) + # n = mp.mpf(n) + # return float(mp.binomial(n + k - mp.one, k) + # * mp.beta(a + n, b + k) / mp.beta(a, b)) + assert_allclose(betanbinom.pmf(x, n, a, b), ref, rtol=1e-10) + + + @pytest.mark.parametrize('n, a, b, ref', + [[10000, 5000, 50, 0.12841520515722202], + [10, 9, 9, 7.9224400871459695], + [100, 1000, 10, 1.5849602176622748]]) + def test_betanbinom_kurtosis(self, n, a, b, ref): + # reference values were computed via mpmath + # from mpmath import mp + # def kurtosis_betanegbinom(n, a, b): + # n = mp.mpf(n) + # a = mp.mpf(a) + # b = mp.mpf(b) + # four = mp.mpf(4.) + # mean = n * b / (a - mp.one) + # var = (n * b * (n + a - 1.) * (a + b - 1.) + # / ((a - 2.) * (a - 1.)**2.)) + # def f(k): + # return (mp.binomial(n + k - mp.one, k) + # * mp.beta(a + n, b + k) / mp.beta(a, b) + # * (k - mean)**four) + # fourth_moment = mp.nsum(f, [0, mp.inf]) + # return float(fourth_moment/var**2 - 3.) + assert_allclose(betanbinom.stats(n, a, b, moments="k"), + ref, rtol=3e-15) + + +class TestZipf: + def test_gh20692(self): + # test that int32 data for k generates same output as double + k = np.arange(0, 1000) + k_int32 = k.astype(np.int32) + dist = zipf(9) + pmf = dist.pmf(k) + pmf_k_int32 = dist.pmf(k_int32) + assert_equal(pmf, pmf_k_int32) \ No newline at end of file diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_distributions.py b/.venv/Lib/site-packages/scipy/stats/tests/test_distributions.py new file mode 100644 index 0000000000000000000000000000000000000000..eed4e6dd6f1d1ad34c3c449eb56ae5ed255934fd --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_distributions.py @@ -0,0 +1,9676 @@ +""" +Test functions for stats module +""" +import warnings +import re +import sys +import pickle +from pathlib import Path +import os +import json +import platform + +from numpy.testing import (assert_equal, assert_array_equal, + assert_almost_equal, assert_array_almost_equal, + assert_allclose, assert_, assert_warns, + assert_array_less, suppress_warnings) +import pytest +from pytest import raises as assert_raises + +import numpy +import numpy as np +from numpy import typecodes, array +from numpy.lib.recfunctions import rec_append_fields +from scipy import special +from scipy._lib._util import check_random_state +from scipy.integrate import (IntegrationWarning, quad, trapezoid, + cumulative_trapezoid) +import scipy.stats as stats +from scipy.stats._distn_infrastructure import argsreduce +import scipy.stats.distributions + +from scipy.special import xlogy, polygamma, entr +from scipy.stats._distr_params import distcont, invdistcont +from .test_discrete_basic import distdiscrete, invdistdiscrete +from scipy.stats._continuous_distns import FitDataError, _argus_phi +from scipy.optimize import root, fmin, differential_evolution +from itertools import product + +# python -OO strips docstrings +DOCSTRINGS_STRIPPED = sys.flags.optimize > 1 + +# Failing on macOS 11, Intel CPUs. See gh-14901 +MACOS_INTEL = (sys.platform == 'darwin') and (platform.machine() == 'x86_64') + + +# distributions to skip while testing the fix for the support method +# introduced in gh-13294. These distributions are skipped as they +# always return a non-nan support for every parametrization. +skip_test_support_gh13294_regression = ['tukeylambda', 'pearson3'] + + +def _assert_hasattr(a, b, msg=None): + if msg is None: + msg = f'{a} does not have attribute {b}' + assert_(hasattr(a, b), msg=msg) + + +def test_api_regression(): + # https://github.com/scipy/scipy/issues/3802 + _assert_hasattr(scipy.stats.distributions, 'f_gen') + + +def test_distributions_submodule(): + actual = set(scipy.stats.distributions.__all__) + continuous = [dist[0] for dist in distcont] # continuous dist names + discrete = [dist[0] for dist in distdiscrete] # discrete dist names + other = ['rv_discrete', 'rv_continuous', 'rv_histogram', + 'entropy', 'trapz'] + expected = continuous + discrete + other + + # need to remove, e.g., + # + expected = set(filter(lambda s: not str(s).startswith('<'), expected)) + + assert actual == expected + + +class TestVonMises: + @pytest.mark.parametrize('k', [0.1, 1, 101]) + @pytest.mark.parametrize('x', [0, 1, np.pi, 10, 100]) + def test_vonmises_periodic(self, k, x): + def check_vonmises_pdf_periodic(k, L, s, x): + vm = stats.vonmises(k, loc=L, scale=s) + assert_almost_equal(vm.pdf(x), vm.pdf(x % (2 * np.pi * s))) + + def check_vonmises_cdf_periodic(k, L, s, x): + vm = stats.vonmises(k, loc=L, scale=s) + assert_almost_equal(vm.cdf(x) % 1, + vm.cdf(x % (2 * np.pi * s)) % 1) + + check_vonmises_pdf_periodic(k, 0, 1, x) + check_vonmises_pdf_periodic(k, 1, 1, x) + check_vonmises_pdf_periodic(k, 0, 10, x) + + check_vonmises_cdf_periodic(k, 0, 1, x) + check_vonmises_cdf_periodic(k, 1, 1, x) + check_vonmises_cdf_periodic(k, 0, 10, x) + + def test_vonmises_line_support(self): + assert_equal(stats.vonmises_line.a, -np.pi) + assert_equal(stats.vonmises_line.b, np.pi) + + def test_vonmises_numerical(self): + vm = stats.vonmises(800) + assert_almost_equal(vm.cdf(0), 0.5) + + # Expected values of the vonmises PDF were computed using + # mpmath with 50 digits of precision: + # + # def vmpdf_mp(x, kappa): + # x = mpmath.mpf(x) + # kappa = mpmath.mpf(kappa) + # num = mpmath.exp(kappa*mpmath.cos(x)) + # den = 2 * mpmath.pi * mpmath.besseli(0, kappa) + # return num/den + + @pytest.mark.parametrize('x, kappa, expected_pdf', + [(0.1, 0.01, 0.16074242744907072), + (0.1, 25.0, 1.7515464099118245), + (0.1, 800, 0.2073272544458798), + (2.0, 0.01, 0.15849003875385817), + (2.0, 25.0, 8.356882934278192e-16), + (2.0, 800, 0.0)]) + def test_vonmises_pdf(self, x, kappa, expected_pdf): + pdf = stats.vonmises.pdf(x, kappa) + assert_allclose(pdf, expected_pdf, rtol=1e-15) + + # Expected values of the vonmises entropy were computed using + # mpmath with 50 digits of precision: + # + # def vonmises_entropy(kappa): + # kappa = mpmath.mpf(kappa) + # return (-kappa * mpmath.besseli(1, kappa) / + # mpmath.besseli(0, kappa) + mpmath.log(2 * mpmath.pi * + # mpmath.besseli(0, kappa))) + # >>> float(vonmises_entropy(kappa)) + + @pytest.mark.parametrize('kappa, expected_entropy', + [(1, 1.6274014590199897), + (5, 0.6756431570114528), + (100, -0.8811275441649473), + (1000, -2.03468891852547), + (2000, -2.3813876496587847)]) + def test_vonmises_entropy(self, kappa, expected_entropy): + entropy = stats.vonmises.entropy(kappa) + assert_allclose(entropy, expected_entropy, rtol=1e-13) + + def test_vonmises_rvs_gh4598(self): + # check that random variates wrap around as discussed in gh-4598 + seed = 30899520 + rng1 = np.random.default_rng(seed) + rng2 = np.random.default_rng(seed) + rng3 = np.random.default_rng(seed) + rvs1 = stats.vonmises(1, loc=0, scale=1).rvs(random_state=rng1) + rvs2 = stats.vonmises(1, loc=2*np.pi, scale=1).rvs(random_state=rng2) + rvs3 = stats.vonmises(1, loc=0, + scale=(2*np.pi/abs(rvs1)+1)).rvs(random_state=rng3) + assert_allclose(rvs1, rvs2, atol=1e-15) + assert_allclose(rvs1, rvs3, atol=1e-15) + + # Expected values of the vonmises LOGPDF were computed + # using wolfram alpha: + # kappa * cos(x) - log(2*pi*I0(kappa)) + @pytest.mark.parametrize('x, kappa, expected_logpdf', + [(0.1, 0.01, -1.8279520246003170), + (0.1, 25.0, 0.5604990605420549), + (0.1, 800, -1.5734567947337514), + (2.0, 0.01, -1.8420635346185686), + (2.0, 25.0, -34.7182759850871489), + (2.0, 800, -1130.4942582548682739)]) + def test_vonmises_logpdf(self, x, kappa, expected_logpdf): + logpdf = stats.vonmises.logpdf(x, kappa) + assert_allclose(logpdf, expected_logpdf, rtol=1e-15) + + def test_vonmises_expect(self): + """ + Test that the vonmises expectation values are + computed correctly. This test checks that the + numeric integration estimates the correct normalization + (1) and mean angle (loc). These expectations are + independent of the chosen 2pi interval. + """ + rng = np.random.default_rng(6762668991392531563) + + loc, kappa, lb = rng.random(3) * 10 + res = stats.vonmises(loc=loc, kappa=kappa).expect(lambda x: 1) + assert_allclose(res, 1) + assert np.issubdtype(res.dtype, np.floating) + + bounds = lb, lb + 2 * np.pi + res = stats.vonmises(loc=loc, kappa=kappa).expect(lambda x: 1, *bounds) + assert_allclose(res, 1) + assert np.issubdtype(res.dtype, np.floating) + + bounds = lb, lb + 2 * np.pi + res = stats.vonmises(loc=loc, kappa=kappa).expect(lambda x: np.exp(1j*x), + *bounds, complex_func=1) + assert_allclose(np.angle(res), loc % (2*np.pi)) + assert np.issubdtype(res.dtype, np.complexfloating) + + @pytest.mark.xslow + @pytest.mark.parametrize("rvs_loc", [0, 2]) + @pytest.mark.parametrize("rvs_shape", [1, 100, 1e8]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_shape', [True, False]) + def test_fit_MLE_comp_optimizer(self, rvs_loc, rvs_shape, + fix_loc, fix_shape): + if fix_shape and fix_loc: + pytest.skip("Nothing to fit.") + + rng = np.random.default_rng(6762668991392531563) + data = stats.vonmises.rvs(rvs_shape, size=1000, loc=rvs_loc, + random_state=rng) + + kwds = {'fscale': 1} + if fix_loc: + kwds['floc'] = rvs_loc + if fix_shape: + kwds['f0'] = rvs_shape + + _assert_less_or_close_loglike(stats.vonmises, data, + stats.vonmises.nnlf, **kwds) + + def test_vonmises_fit_bad_floc(self): + data = [-0.92923506, -0.32498224, 0.13054989, -0.97252014, 2.79658071, + -0.89110948, 1.22520295, 1.44398065, 2.49163859, 1.50315096, + 3.05437696, -2.73126329, -3.06272048, 1.64647173, 1.94509247, + -1.14328023, 0.8499056, 2.36714682, -1.6823179, -0.88359996] + data = np.asarray(data) + loc = -0.5 * np.pi + kappa_fit, loc_fit, scale_fit = stats.vonmises.fit(data, floc=loc) + assert kappa_fit == np.finfo(float).tiny + _assert_less_or_close_loglike(stats.vonmises, data, + stats.vonmises.nnlf, fscale=1, floc=loc) + + @pytest.mark.parametrize('sign', [-1, 1]) + def test_vonmises_fit_unwrapped_data(self, sign): + rng = np.random.default_rng(6762668991392531563) + data = stats.vonmises(loc=sign*0.5*np.pi, kappa=10).rvs(100000, + random_state=rng) + shifted_data = data + 4*np.pi + kappa_fit, loc_fit, scale_fit = stats.vonmises.fit(data) + kappa_fit_shifted, loc_fit_shifted, _ = stats.vonmises.fit(shifted_data) + assert_allclose(loc_fit, loc_fit_shifted) + assert_allclose(kappa_fit, kappa_fit_shifted) + assert scale_fit == 1 + assert -np.pi < loc_fit < np.pi + + def test_vonmises_kappa_0_gh18166(self): + # Check that kappa = 0 is supported. + dist = stats.vonmises(0) + assert_allclose(dist.pdf(0), 1 / (2 * np.pi), rtol=1e-15) + assert_allclose(dist.cdf(np.pi/2), 0.75, rtol=1e-15) + assert_allclose(dist.sf(-np.pi/2), 0.75, rtol=1e-15) + assert_allclose(dist.ppf(0.9), np.pi*0.8, rtol=1e-15) + assert_allclose(dist.mean(), 0, atol=1e-15) + assert_allclose(dist.expect(), 0, atol=1e-15) + assert np.all(np.abs(dist.rvs(size=10, random_state=1234)) <= np.pi) + + def test_vonmises_fit_equal_data(self): + # When all data are equal, expect kappa = 1e16. + kappa, loc, scale = stats.vonmises.fit([0]) + assert kappa == 1e16 and loc == 0 and scale == 1 + + def test_vonmises_fit_bounds(self): + # For certain input data, the root bracket is violated numerically. + # Test that this situation is handled. The input data below are + # crafted to trigger the bound violation for the current choice of + # bounds and the specific way the bounds and the objective function + # are computed. + + # Test that no exception is raised when the lower bound is violated. + scipy.stats.vonmises.fit([0, 3.7e-08], floc=0) + + # Test that no exception is raised when the upper bound is violated. + scipy.stats.vonmises.fit([np.pi/2*(1-4.86e-9)], floc=0) + + +def _assert_less_or_close_loglike(dist, data, func=None, maybe_identical=False, + **kwds): + """ + This utility function checks that the negative log-likelihood function + (or `func`) of the result computed using dist.fit() is less than or equal + to the result computed using the generic fit method. Because of + normal numerical imprecision, the "equality" check is made using + `np.allclose` with a relative tolerance of 1e-15. + """ + if func is None: + func = dist.nnlf + + mle_analytical = dist.fit(data, **kwds) + numerical_opt = super(type(dist), dist).fit(data, **kwds) + + # Sanity check that the analytical MLE is actually executed. + # Due to floating point arithmetic, the generic MLE is unlikely + # to produce the exact same result as the analytical MLE. + if not maybe_identical: + assert np.any(mle_analytical != numerical_opt) + + ll_mle_analytical = func(mle_analytical, data) + ll_numerical_opt = func(numerical_opt, data) + assert (ll_mle_analytical <= ll_numerical_opt or + np.allclose(ll_mle_analytical, ll_numerical_opt, rtol=1e-15)) + + # Ideally we'd check that shapes are correctly fixed, too, but that is + # complicated by the many ways of fixing them (e.g. f0, fix_a, fa). + if 'floc' in kwds: + assert mle_analytical[-2] == kwds['floc'] + if 'fscale' in kwds: + assert mle_analytical[-1] == kwds['fscale'] + + +def assert_fit_warnings(dist): + param = ['floc', 'fscale'] + if dist.shapes: + nshapes = len(dist.shapes.split(",")) + param += ['f0', 'f1', 'f2'][:nshapes] + all_fixed = dict(zip(param, np.arange(len(param)))) + data = [1, 2, 3] + with pytest.raises(RuntimeError, + match="All parameters fixed. There is nothing " + "to optimize."): + dist.fit(data, **all_fixed) + with pytest.raises(ValueError, + match="The data contains non-finite values"): + dist.fit([np.nan]) + with pytest.raises(ValueError, + match="The data contains non-finite values"): + dist.fit([np.inf]) + with pytest.raises(TypeError, match="Unknown keyword arguments:"): + dist.fit(data, extra_keyword=2) + with pytest.raises(TypeError, match="Too many positional arguments."): + dist.fit(data, *[1]*(len(param) - 1)) + + +@pytest.mark.parametrize('dist', + ['alpha', 'betaprime', + 'fatiguelife', 'invgamma', 'invgauss', 'invweibull', + 'johnsonsb', 'levy', 'levy_l', 'lognorm', 'gibrat', + 'powerlognorm', 'rayleigh', 'wald']) +def test_support(dist): + """gh-6235""" + dct = dict(distcont) + args = dct[dist] + + dist = getattr(stats, dist) + + assert_almost_equal(dist.pdf(dist.a, *args), 0) + assert_equal(dist.logpdf(dist.a, *args), -np.inf) + assert_almost_equal(dist.pdf(dist.b, *args), 0) + assert_equal(dist.logpdf(dist.b, *args), -np.inf) + + +class TestRandInt: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.randint.rvs(5, 30, size=100) + assert_(numpy.all(vals < 30) & numpy.all(vals >= 5)) + assert_(len(vals) == 100) + vals = stats.randint.rvs(5, 30, size=(2, 50)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.randint.rvs(15, 46) + assert_((val >= 15) & (val < 46)) + assert_(isinstance(val, numpy.ScalarType), msg=repr(type(val))) + val = stats.randint(15, 46).rvs(3) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_pdf(self): + k = numpy.r_[0:36] + out = numpy.where((k >= 5) & (k < 30), 1.0/(30-5), 0) + vals = stats.randint.pmf(k, 5, 30) + assert_array_almost_equal(vals, out) + + def test_cdf(self): + x = np.linspace(0, 36, 100) + k = numpy.floor(x) + out = numpy.select([k >= 30, k >= 5], [1.0, (k-5.0+1)/(30-5.0)], 0) + vals = stats.randint.cdf(x, 5, 30) + assert_array_almost_equal(vals, out, decimal=12) + + +class TestBinom: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.binom.rvs(10, 0.75, size=(2, 50)) + assert_(numpy.all(vals >= 0) & numpy.all(vals <= 10)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.binom.rvs(10, 0.75) + assert_(isinstance(val, int)) + val = stats.binom(10, 0.75).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_pmf(self): + # regression test for Ticket #1842 + vals1 = stats.binom.pmf(100, 100, 1) + vals2 = stats.binom.pmf(0, 100, 0) + assert_allclose(vals1, 1.0, rtol=1e-15, atol=0) + assert_allclose(vals2, 1.0, rtol=1e-15, atol=0) + + def test_entropy(self): + # Basic entropy tests. + b = stats.binom(2, 0.5) + expected_p = np.array([0.25, 0.5, 0.25]) + expected_h = -sum(xlogy(expected_p, expected_p)) + h = b.entropy() + assert_allclose(h, expected_h) + + b = stats.binom(2, 0.0) + h = b.entropy() + assert_equal(h, 0.0) + + b = stats.binom(2, 1.0) + h = b.entropy() + assert_equal(h, 0.0) + + def test_warns_p0(self): + # no spurious warnings are generated for p=0; gh-3817 + with warnings.catch_warnings(): + warnings.simplefilter("error", RuntimeWarning) + assert_equal(stats.binom(n=2, p=0).mean(), 0) + assert_equal(stats.binom(n=2, p=0).std(), 0) + + def test_ppf_p1(self): + # Check that gh-17388 is resolved: PPF == n when p = 1 + n = 4 + assert stats.binom.ppf(q=0.3, n=n, p=1.0) == n + + def test_pmf_poisson(self): + # Check that gh-17146 is resolved: binom -> poisson + n = 1541096362225563.0 + p = 1.0477878413173978e-18 + x = np.arange(3) + res = stats.binom.pmf(x, n=n, p=p) + ref = stats.poisson.pmf(x, n * p) + assert_allclose(res, ref, atol=1e-16) + + def test_pmf_cdf(self): + # Check that gh-17809 is resolved: binom.pmf(0) ~ binom.cdf(0) + n = 25.0 * 10 ** 21 + p = 1.0 * 10 ** -21 + r = 0 + res = stats.binom.pmf(r, n, p) + ref = stats.binom.cdf(r, n, p) + assert_allclose(res, ref, atol=1e-16) + + def test_pmf_gh15101(self): + # Check that gh-15101 is resolved (no divide warnings when p~1, n~oo) + res = stats.binom.pmf(3, 2000, 0.999) + assert_allclose(res, 0, atol=1e-16) + + +class TestArcsine: + + def test_endpoints(self): + # Regression test for gh-13697. The following calculation + # should not generate a warning. + p = stats.arcsine.pdf([0, 1]) + assert_equal(p, [np.inf, np.inf]) + + +class TestBernoulli: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.bernoulli.rvs(0.75, size=(2, 50)) + assert_(numpy.all(vals >= 0) & numpy.all(vals <= 1)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.bernoulli.rvs(0.75) + assert_(isinstance(val, int)) + val = stats.bernoulli(0.75).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_entropy(self): + # Simple tests of entropy. + b = stats.bernoulli(0.25) + expected_h = -0.25*np.log(0.25) - 0.75*np.log(0.75) + h = b.entropy() + assert_allclose(h, expected_h) + + b = stats.bernoulli(0.0) + h = b.entropy() + assert_equal(h, 0.0) + + b = stats.bernoulli(1.0) + h = b.entropy() + assert_equal(h, 0.0) + + +class TestBradford: + # gh-6216 + def test_cdf_ppf(self): + c = 0.1 + x = np.logspace(-20, -4) + q = stats.bradford.cdf(x, c) + xx = stats.bradford.ppf(q, c) + assert_allclose(x, xx) + + +class TestChi: + + # "Exact" value of chi.sf(10, 4), as computed by Wolfram Alpha with + # 1 - CDF[ChiDistribution[4], 10] + CHI_SF_10_4 = 9.83662422461598e-21 + # "Exact" value of chi.mean(df=1000) as computed by Wolfram Alpha with + # Mean[ChiDistribution[1000]] + CHI_MEAN_1000 = 31.614871896980 + + def test_sf(self): + s = stats.chi.sf(10, 4) + assert_allclose(s, self.CHI_SF_10_4, rtol=1e-15) + + def test_isf(self): + x = stats.chi.isf(self.CHI_SF_10_4, 4) + assert_allclose(x, 10, rtol=1e-15) + + # reference value for 1e14 was computed via mpmath + # from mpmath import mp + # mp.dps = 500 + # df = mp.mpf(1e14) + # float(mp.rf(mp.mpf(0.5) * df, mp.mpf(0.5)) * mp.sqrt(2.)) + + @pytest.mark.parametrize('df, ref', + [(1e3, CHI_MEAN_1000), + (1e14, 9999999.999999976)] + ) + def test_mean(self, df, ref): + assert_allclose(stats.chi.mean(df), ref, rtol=1e-12) + + # Entropy references values were computed with the following mpmath code + # from mpmath import mp + # mp.dps = 50 + # def chi_entropy_mpmath(df): + # df = mp.mpf(df) + # half_df = 0.5 * df + # entropy = mp.log(mp.gamma(half_df)) + 0.5 * \ + # (df - mp.log(2) - (df - mp.one) * mp.digamma(half_df)) + # return float(entropy) + + @pytest.mark.parametrize('df, ref', + [(1e-4, -9989.7316027504), + (1, 0.7257913526447274), + (1e3, 1.0721981095025448), + (1e10, 1.0723649429080335), + (1e100, 1.0723649429247002)]) + def test_entropy(self, df, ref): + assert_allclose(stats.chi(df).entropy(), ref, rtol=1e-15) + + +class TestNBinom: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.nbinom.rvs(10, 0.75, size=(2, 50)) + assert_(numpy.all(vals >= 0)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.nbinom.rvs(10, 0.75) + assert_(isinstance(val, int)) + val = stats.nbinom(10, 0.75).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_pmf(self): + # regression test for ticket 1779 + assert_allclose(np.exp(stats.nbinom.logpmf(700, 721, 0.52)), + stats.nbinom.pmf(700, 721, 0.52)) + # logpmf(0,1,1) shouldn't return nan (regression test for gh-4029) + val = scipy.stats.nbinom.logpmf(0, 1, 1) + assert_equal(val, 0) + + def test_logcdf_gh16159(self): + # check that gh16159 is resolved. + vals = stats.nbinom.logcdf([0, 5, 0, 5], n=4.8, p=0.45) + ref = np.log(stats.nbinom.cdf([0, 5, 0, 5], n=4.8, p=0.45)) + assert_allclose(vals, ref) + + +class TestGenInvGauss: + def setup_method(self): + np.random.seed(1234) + + @pytest.mark.slow + def test_rvs_with_mode_shift(self): + # ratio_unif w/ mode shift + gig = stats.geninvgauss(2.3, 1.5) + _, p = stats.kstest(gig.rvs(size=1500, random_state=1234), gig.cdf) + assert_equal(p > 0.05, True) + + @pytest.mark.slow + def test_rvs_without_mode_shift(self): + # ratio_unif w/o mode shift + gig = stats.geninvgauss(0.9, 0.75) + _, p = stats.kstest(gig.rvs(size=1500, random_state=1234), gig.cdf) + assert_equal(p > 0.05, True) + + @pytest.mark.slow + def test_rvs_new_method(self): + # new algorithm of Hoermann / Leydold + gig = stats.geninvgauss(0.1, 0.2) + _, p = stats.kstest(gig.rvs(size=1500, random_state=1234), gig.cdf) + assert_equal(p > 0.05, True) + + @pytest.mark.slow + def test_rvs_p_zero(self): + def my_ks_check(p, b): + gig = stats.geninvgauss(p, b) + rvs = gig.rvs(size=1500, random_state=1234) + return stats.kstest(rvs, gig.cdf)[1] > 0.05 + # boundary cases when p = 0 + assert_equal(my_ks_check(0, 0.2), True) # new algo + assert_equal(my_ks_check(0, 0.9), True) # ratio_unif w/o shift + assert_equal(my_ks_check(0, 1.5), True) # ratio_unif with shift + + def test_rvs_negative_p(self): + # if p negative, return inverse + assert_equal( + stats.geninvgauss(-1.5, 2).rvs(size=10, random_state=1234), + 1 / stats.geninvgauss(1.5, 2).rvs(size=10, random_state=1234)) + + def test_invgauss(self): + # test that invgauss is special case + ig = stats.geninvgauss.rvs(size=1500, p=-0.5, b=1, random_state=1234) + assert_equal(stats.kstest(ig, 'invgauss', args=[1])[1] > 0.15, True) + # test pdf and cdf + mu, x = 100, np.linspace(0.01, 1, 10) + pdf_ig = stats.geninvgauss.pdf(x, p=-0.5, b=1 / mu, scale=mu) + assert_allclose(pdf_ig, stats.invgauss(mu).pdf(x)) + cdf_ig = stats.geninvgauss.cdf(x, p=-0.5, b=1 / mu, scale=mu) + assert_allclose(cdf_ig, stats.invgauss(mu).cdf(x)) + + def test_pdf_R(self): + # test against R package GIGrvg + # x <- seq(0.01, 5, length.out = 10) + # GIGrvg::dgig(x, 0.5, 1, 1) + vals_R = np.array([2.081176820e-21, 4.488660034e-01, 3.747774338e-01, + 2.693297528e-01, 1.905637275e-01, 1.351476913e-01, + 9.636538981e-02, 6.909040154e-02, 4.978006801e-02, + 3.602084467e-02]) + x = np.linspace(0.01, 5, 10) + assert_allclose(vals_R, stats.geninvgauss.pdf(x, 0.5, 1)) + + def test_pdf_zero(self): + # pdf at 0 is 0, needs special treatment to avoid 1/x in pdf + assert_equal(stats.geninvgauss.pdf(0, 0.5, 0.5), 0) + # if x is large and p is moderate, make sure that pdf does not + # overflow because of x**(p-1); exp(-b*x) forces pdf to zero + assert_equal(stats.geninvgauss.pdf(2e6, 50, 2), 0) + + +class TestGenHyperbolic: + def setup_method(self): + np.random.seed(1234) + + def test_pdf_r(self): + # test against R package GeneralizedHyperbolic + # x <- seq(-10, 10, length.out = 10) + # GeneralizedHyperbolic::dghyp( + # x = x, lambda = 2, alpha = 2, beta = 1, delta = 1.5, mu = 0.5 + # ) + vals_R = np.array([ + 2.94895678275316e-13, 1.75746848647696e-10, 9.48149804073045e-08, + 4.17862521692026e-05, 0.0103947630463822, 0.240864958986839, + 0.162833527161649, 0.0374609592899472, 0.00634894847327781, + 0.000941920705790324 + ]) + + lmbda, alpha, beta = 2, 2, 1 + mu, delta = 0.5, 1.5 + args = (lmbda, alpha*delta, beta*delta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + x = np.linspace(-10, 10, 10) + + assert_allclose(gh.pdf(x), vals_R, atol=0, rtol=1e-13) + + def test_cdf_r(self): + # test against R package GeneralizedHyperbolic + # q <- seq(-10, 10, length.out = 10) + # GeneralizedHyperbolic::pghyp( + # q = q, lambda = 2, alpha = 2, beta = 1, delta = 1.5, mu = 0.5 + # ) + vals_R = np.array([ + 1.01881590921421e-13, 6.13697274983578e-11, 3.37504977637992e-08, + 1.55258698166181e-05, 0.00447005453832497, 0.228935323956347, + 0.755759458895243, 0.953061062884484, 0.992598013917513, + 0.998942646586662 + ]) + + lmbda, alpha, beta = 2, 2, 1 + mu, delta = 0.5, 1.5 + args = (lmbda, alpha*delta, beta*delta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + x = np.linspace(-10, 10, 10) + + assert_allclose(gh.cdf(x), vals_R, atol=0, rtol=1e-6) + + # The reference values were computed by implementing the PDF with mpmath + # and integrating it with mp.quad. The values were computed with + # mp.dps=250, and then again with mp.dps=400 to ensure the full 64 bit + # precision was computed. + @pytest.mark.parametrize( + 'x, p, a, b, loc, scale, ref', + [(-15, 2, 3, 1.5, 0.5, 1.5, 4.770036428808252e-20), + (-15, 10, 1.5, 0.25, 1, 5, 0.03282964575089294), + (-15, 10, 1.5, 1.375, 0, 1, 3.3711159600215594e-23), + (-15, 0.125, 1.5, 1.49995, 0, 1, 4.729401428898605e-23), + (-1, 0.125, 1.5, 1.49995, 0, 1, 0.0003565725914786859), + (5, -0.125, 1.5, 1.49995, 0, 1, 0.2600651974023352), + (5, -0.125, 1000, 999, 0, 1, 5.923270556517253e-28), + (20, -0.125, 1000, 999, 0, 1, 0.23452293711665634), + (40, -0.125, 1000, 999, 0, 1, 0.9999648749561968), + (60, -0.125, 1000, 999, 0, 1, 0.9999999999975475)] + ) + def test_cdf_mpmath(self, x, p, a, b, loc, scale, ref): + cdf = stats.genhyperbolic.cdf(x, p, a, b, loc=loc, scale=scale) + assert_allclose(cdf, ref, rtol=5e-12) + + # The reference values were computed by implementing the PDF with mpmath + # and integrating it with mp.quad. The values were computed with + # mp.dps=250, and then again with mp.dps=400 to ensure the full 64 bit + # precision was computed. + @pytest.mark.parametrize( + 'x, p, a, b, loc, scale, ref', + [(0, 1e-6, 12, -1, 0, 1, 0.38520358671350524), + (-1, 3, 2.5, 2.375, 1, 3, 0.9999901774267577), + (-20, 3, 2.5, 2.375, 1, 3, 1.0), + (25, 2, 3, 1.5, 0.5, 1.5, 8.593419916523976e-10), + (300, 10, 1.5, 0.25, 1, 5, 6.137415609872158e-24), + (60, -0.125, 1000, 999, 0, 1, 2.4524915075944173e-12), + (75, -0.125, 1000, 999, 0, 1, 2.9435194886214633e-18)] + ) + def test_sf_mpmath(self, x, p, a, b, loc, scale, ref): + sf = stats.genhyperbolic.sf(x, p, a, b, loc=loc, scale=scale) + assert_allclose(sf, ref, rtol=5e-12) + + def test_moments_r(self): + # test against R package GeneralizedHyperbolic + # sapply(1:4, + # function(x) GeneralizedHyperbolic::ghypMom( + # order = x, lambda = 2, alpha = 2, + # beta = 1, delta = 1.5, mu = 0.5, + # momType = 'raw') + # ) + + vals_R = [2.36848366948115, 8.4739346779246, + 37.8870502710066, 205.76608511485] + + lmbda, alpha, beta = 2, 2, 1 + mu, delta = 0.5, 1.5 + args = (lmbda, alpha*delta, beta*delta) + + vals_us = [ + stats.genhyperbolic(*args, loc=mu, scale=delta).moment(i) + for i in range(1, 5) + ] + + assert_allclose(vals_us, vals_R, atol=0, rtol=1e-13) + + def test_rvs(self): + # Kolmogorov-Smirnov test to ensure alignment + # of analytical and empirical cdfs + + lmbda, alpha, beta = 2, 2, 1 + mu, delta = 0.5, 1.5 + args = (lmbda, alpha*delta, beta*delta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + _, p = stats.kstest(gh.rvs(size=1500, random_state=1234), gh.cdf) + + assert_equal(p > 0.05, True) + + def test_pdf_t(self): + # Test Against T-Student with 1 - 30 df + df = np.linspace(1, 30, 10) + + # in principle alpha should be zero in practice for big lmbdas + # alpha cannot be too small else pdf does not integrate + alpha, beta = np.float_power(df, 2)*np.finfo(np.float32).eps, 0 + mu, delta = 0, np.sqrt(df) + args = (-df/2, alpha, beta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + x = np.linspace(gh.ppf(0.01), gh.ppf(0.99), 50)[:, np.newaxis] + + assert_allclose( + gh.pdf(x), stats.t.pdf(x, df), + atol=0, rtol=1e-6 + ) + + def test_pdf_cauchy(self): + # Test Against Cauchy distribution + + # in principle alpha should be zero in practice for big lmbdas + # alpha cannot be too small else pdf does not integrate + lmbda, alpha, beta = -0.5, np.finfo(np.float32).eps, 0 + mu, delta = 0, 1 + args = (lmbda, alpha, beta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + x = np.linspace(gh.ppf(0.01), gh.ppf(0.99), 50)[:, np.newaxis] + + assert_allclose( + gh.pdf(x), stats.cauchy.pdf(x), + atol=0, rtol=1e-6 + ) + + def test_pdf_laplace(self): + # Test Against Laplace with location param [-10, 10] + loc = np.linspace(-10, 10, 10) + + # in principle delta should be zero in practice for big loc delta + # cannot be too small else pdf does not integrate + delta = np.finfo(np.float32).eps + + lmbda, alpha, beta = 1, 1, 0 + args = (lmbda, alpha*delta, beta*delta) + + # ppf does not integrate for scale < 5e-4 + # therefore using simple linspace to define the support + gh = stats.genhyperbolic(*args, loc=loc, scale=delta) + x = np.linspace(-20, 20, 50)[:, np.newaxis] + + assert_allclose( + gh.pdf(x), stats.laplace.pdf(x, loc=loc, scale=1), + atol=0, rtol=1e-11 + ) + + def test_pdf_norminvgauss(self): + # Test Against NIG with varying alpha/beta/delta/mu + + alpha, beta, delta, mu = ( + np.linspace(1, 20, 10), + np.linspace(0, 19, 10)*np.float_power(-1, range(10)), + np.linspace(1, 1, 10), + np.linspace(-100, 100, 10) + ) + + lmbda = - 0.5 + args = (lmbda, alpha * delta, beta * delta) + + gh = stats.genhyperbolic(*args, loc=mu, scale=delta) + x = np.linspace(gh.ppf(0.01), gh.ppf(0.99), 50)[:, np.newaxis] + + assert_allclose( + gh.pdf(x), stats.norminvgauss.pdf( + x, a=alpha, b=beta, loc=mu, scale=delta), + atol=0, rtol=1e-13 + ) + + +class TestHypSecant: + + # Reference values were computed with the mpmath expression + # float((2/mp.pi)*mp.atan(mp.exp(-x))) + # and mp.dps = 50. + @pytest.mark.parametrize('x, reference', + [(30, 5.957247804324683e-14), + (50, 1.2278802891647964e-22)]) + def test_sf(self, x, reference): + sf = stats.hypsecant.sf(x) + assert_allclose(sf, reference, rtol=5e-15) + + # Reference values were computed with the mpmath expression + # float(-mp.log(mp.tan((mp.pi/2)*p))) + # and mp.dps = 50. + @pytest.mark.parametrize('p, reference', + [(1e-6, 13.363927852673998), + (1e-12, 27.179438410639094)]) + def test_isf(self, p, reference): + x = stats.hypsecant.isf(p) + assert_allclose(x, reference, rtol=5e-15) + + +class TestNormInvGauss: + def setup_method(self): + np.random.seed(1234) + + def test_cdf_R(self): + # test pdf and cdf vals against R + # require("GeneralizedHyperbolic") + # x_test <- c(-7, -5, 0, 8, 15) + # r_cdf <- GeneralizedHyperbolic::pnig(x_test, mu = 0, a = 1, b = 0.5) + # r_pdf <- GeneralizedHyperbolic::dnig(x_test, mu = 0, a = 1, b = 0.5) + r_cdf = np.array([8.034920282e-07, 2.512671945e-05, 3.186661051e-01, + 9.988650664e-01, 9.999848769e-01]) + x_test = np.array([-7, -5, 0, 8, 15]) + vals_cdf = stats.norminvgauss.cdf(x_test, a=1, b=0.5) + assert_allclose(vals_cdf, r_cdf, atol=1e-9) + + def test_pdf_R(self): + # values from R as defined in test_cdf_R + r_pdf = np.array([1.359600783e-06, 4.413878805e-05, 4.555014266e-01, + 7.450485342e-04, 8.917889931e-06]) + x_test = np.array([-7, -5, 0, 8, 15]) + vals_pdf = stats.norminvgauss.pdf(x_test, a=1, b=0.5) + assert_allclose(vals_pdf, r_pdf, atol=1e-9) + + @pytest.mark.parametrize('x, a, b, sf, rtol', + [(-1, 1, 0, 0.8759652211005315, 1e-13), + (25, 1, 0, 1.1318690184042579e-13, 1e-4), + (1, 5, -1.5, 0.002066711134653577, 1e-12), + (10, 5, -1.5, 2.308435233930669e-29, 1e-9)]) + def test_sf_isf_mpmath(self, x, a, b, sf, rtol): + # Reference data generated with `reference_distributions.NormInvGauss`, + # e.g. `NormInvGauss(alpha=1, beta=0).sf(-1)` with mp.dps = 50 + s = stats.norminvgauss.sf(x, a, b) + assert_allclose(s, sf, rtol=rtol) + i = stats.norminvgauss.isf(sf, a, b) + assert_allclose(i, x, rtol=rtol) + + def test_sf_isf_mpmath_vectorized(self): + x = [-1, 25] + a = [1, 1] + b = 0 + sf = [0.8759652211005315, 1.1318690184042579e-13] # see previous test + s = stats.norminvgauss.sf(x, a, b) + assert_allclose(s, sf, rtol=1e-13, atol=1e-16) + i = stats.norminvgauss.isf(sf, a, b) + # Not perfect, but better than it was. See gh-13338. + assert_allclose(i, x, rtol=1e-6) + + def test_gh8718(self): + # Add test that gh-13338 resolved gh-8718 + dst = stats.norminvgauss(1, 0) + x = np.arange(0, 20, 2) + sf = dst.sf(x) + isf = dst.isf(sf) + assert_allclose(isf, x) + + def test_stats(self): + a, b = 1, 0.5 + gamma = np.sqrt(a**2 - b**2) + v_stats = (b / gamma, a**2 / gamma**3, 3.0 * b / (a * np.sqrt(gamma)), + 3.0 * (1 + 4 * b**2 / a**2) / gamma) + assert_equal(v_stats, stats.norminvgauss.stats(a, b, moments='mvsk')) + + def test_ppf(self): + a, b = 1, 0.5 + x_test = np.array([0.001, 0.5, 0.999]) + vals = stats.norminvgauss.ppf(x_test, a, b) + assert_allclose(x_test, stats.norminvgauss.cdf(vals, a, b)) + + +class TestGeom: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.geom.rvs(0.75, size=(2, 50)) + assert_(numpy.all(vals >= 0)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.geom.rvs(0.75) + assert_(isinstance(val, int)) + val = stats.geom(0.75).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_rvs_9313(self): + # previously, RVS were converted to `np.int32` on some platforms, + # causing overflow for moderately large integer output (gh-9313). + # Check that this is resolved to the extent possible w/ `np.int64`. + rng = np.random.default_rng(649496242618848) + rvs = stats.geom.rvs(np.exp(-35), size=5, random_state=rng) + assert rvs.dtype == np.int64 + assert np.all(rvs > np.iinfo(np.int32).max) + + def test_pmf(self): + vals = stats.geom.pmf([1, 2, 3], 0.5) + assert_array_almost_equal(vals, [0.5, 0.25, 0.125]) + + def test_logpmf(self): + # regression test for ticket 1793 + vals1 = np.log(stats.geom.pmf([1, 2, 3], 0.5)) + vals2 = stats.geom.logpmf([1, 2, 3], 0.5) + assert_allclose(vals1, vals2, rtol=1e-15, atol=0) + + # regression test for gh-4028 + val = stats.geom.logpmf(1, 1) + assert_equal(val, 0.0) + + def test_cdf_sf(self): + vals = stats.geom.cdf([1, 2, 3], 0.5) + vals_sf = stats.geom.sf([1, 2, 3], 0.5) + expected = array([0.5, 0.75, 0.875]) + assert_array_almost_equal(vals, expected) + assert_array_almost_equal(vals_sf, 1-expected) + + def test_logcdf_logsf(self): + vals = stats.geom.logcdf([1, 2, 3], 0.5) + vals_sf = stats.geom.logsf([1, 2, 3], 0.5) + expected = array([0.5, 0.75, 0.875]) + assert_array_almost_equal(vals, np.log(expected)) + assert_array_almost_equal(vals_sf, np.log1p(-expected)) + + def test_ppf(self): + vals = stats.geom.ppf([0.5, 0.75, 0.875], 0.5) + expected = array([1.0, 2.0, 3.0]) + assert_array_almost_equal(vals, expected) + + def test_ppf_underflow(self): + # this should not underflow + assert_allclose(stats.geom.ppf(1e-20, 1e-20), 1.0, atol=1e-14) + + def test_entropy_gh18226(self): + # gh-18226 reported that `geom.entropy` produced a warning and + # inaccurate output for small p. Check that this is resolved. + h = stats.geom(0.0146).entropy() + assert_allclose(h, 5.219397961962308, rtol=1e-15) + + +class TestPlanck: + def setup_method(self): + np.random.seed(1234) + + def test_sf(self): + vals = stats.planck.sf([1, 2, 3], 5.) + expected = array([4.5399929762484854e-05, + 3.0590232050182579e-07, + 2.0611536224385579e-09]) + assert_array_almost_equal(vals, expected) + + def test_logsf(self): + vals = stats.planck.logsf([1000., 2000., 3000.], 1000.) + expected = array([-1001000., -2001000., -3001000.]) + assert_array_almost_equal(vals, expected) + + +class TestGennorm: + def test_laplace(self): + # test against Laplace (special case for beta=1) + points = [1, 2, 3] + pdf1 = stats.gennorm.pdf(points, 1) + pdf2 = stats.laplace.pdf(points) + assert_almost_equal(pdf1, pdf2) + + def test_norm(self): + # test against normal (special case for beta=2) + points = [1, 2, 3] + pdf1 = stats.gennorm.pdf(points, 2) + pdf2 = stats.norm.pdf(points, scale=2**-.5) + assert_almost_equal(pdf1, pdf2) + + def test_rvs(self): + np.random.seed(0) + # 0 < beta < 1 + dist = stats.gennorm(0.5) + rvs = dist.rvs(size=1000) + assert stats.kstest(rvs, dist.cdf).pvalue > 0.1 + # beta = 1 + dist = stats.gennorm(1) + rvs = dist.rvs(size=1000) + rvs_laplace = stats.laplace.rvs(size=1000) + assert stats.ks_2samp(rvs, rvs_laplace).pvalue > 0.1 + # beta = 2 + dist = stats.gennorm(2) + rvs = dist.rvs(size=1000) + rvs_norm = stats.norm.rvs(scale=1/2**0.5, size=1000) + assert stats.ks_2samp(rvs, rvs_norm).pvalue > 0.1 + + def test_rvs_broadcasting(self): + np.random.seed(0) + dist = stats.gennorm([[0.5, 1.], [2., 5.]]) + rvs = dist.rvs(size=[1000, 2, 2]) + assert stats.kstest(rvs[:, 0, 0], stats.gennorm(0.5).cdf)[1] > 0.1 + assert stats.kstest(rvs[:, 0, 1], stats.gennorm(1.0).cdf)[1] > 0.1 + assert stats.kstest(rvs[:, 1, 0], stats.gennorm(2.0).cdf)[1] > 0.1 + assert stats.kstest(rvs[:, 1, 1], stats.gennorm(5.0).cdf)[1] > 0.1 + + +class TestGibrat: + + # sfx is sf(x). The values were computed with mpmath: + # + # from mpmath import mp + # mp.dps = 100 + # def gibrat_sf(x): + # return 1 - mp.ncdf(mp.log(x)) + # + # E.g. + # + # >>> float(gibrat_sf(1.5)) + # 0.3425678305148459 + # + @pytest.mark.parametrize('x, sfx', [(1.5, 0.3425678305148459), + (5000, 8.173334352522493e-18)]) + def test_sf_isf(self, x, sfx): + assert_allclose(stats.gibrat.sf(x), sfx, rtol=2e-14) + assert_allclose(stats.gibrat.isf(sfx), x, rtol=2e-14) + + +class TestGompertz: + + def test_gompertz_accuracy(self): + # Regression test for gh-4031 + p = stats.gompertz.ppf(stats.gompertz.cdf(1e-100, 1), 1) + assert_allclose(p, 1e-100) + + # sfx is sf(x). The values were computed with mpmath: + # + # from mpmath import mp + # mp.dps = 100 + # def gompertz_sf(x, c): + # return mp.exp(-c*mp.expm1(x)) + # + # E.g. + # + # >>> float(gompertz_sf(1, 2.5)) + # 0.013626967146253437 + # + @pytest.mark.parametrize('x, c, sfx', [(1, 2.5, 0.013626967146253437), + (3, 2.5, 1.8973243273704087e-21), + (0.05, 5, 0.7738668242570479), + (2.25, 5, 3.707795833465481e-19)]) + def test_sf_isf(self, x, c, sfx): + assert_allclose(stats.gompertz.sf(x, c), sfx, rtol=1e-14) + assert_allclose(stats.gompertz.isf(sfx, c), x, rtol=1e-14) + + # reference values were computed with mpmath + # from mpmath import mp + # mp.dps = 100 + # def gompertz_entropy(c): + # c = mp.mpf(c) + # return float(mp.one - mp.log(c) - mp.exp(c)*mp.e1(c)) + + @pytest.mark.parametrize('c, ref', [(1e-4, 1.5762523017634573), + (1, 0.4036526376768059), + (1000, -5.908754280976161), + (1e10, -22.025850930040455)]) + def test_entropy(self, c, ref): + assert_allclose(stats.gompertz.entropy(c), ref, rtol=1e-14) + + +class TestFoldNorm: + + # reference values were computed with mpmath with 50 digits of precision + # from mpmath import mp + # mp.dps = 50 + # mp.mpf(0.5) * (mp.erf((x - c)/mp.sqrt(2)) + mp.erf((x + c)/mp.sqrt(2))) + + @pytest.mark.parametrize('x, c, ref', [(1e-4, 1e-8, 7.978845594730578e-05), + (1e-4, 1e-4, 7.97884555483635e-05)]) + def test_cdf(self, x, c, ref): + assert_allclose(stats.foldnorm.cdf(x, c), ref, rtol=1e-15) + + +class TestHalfNorm: + + # sfx is sf(x). The values were computed with mpmath: + # + # from mpmath import mp + # mp.dps = 100 + # def halfnorm_sf(x): + # return 2*(1 - mp.ncdf(x)) + # + # E.g. + # + # >>> float(halfnorm_sf(1)) + # 0.3173105078629141 + # + @pytest.mark.parametrize('x, sfx', [(1, 0.3173105078629141), + (10, 1.523970604832105e-23)]) + def test_sf_isf(self, x, sfx): + assert_allclose(stats.halfnorm.sf(x), sfx, rtol=1e-14) + assert_allclose(stats.halfnorm.isf(sfx), x, rtol=1e-14) + + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 100 + # def halfnorm_cdf_mpmath(x): + # x = mp.mpf(x) + # return float(mp.erf(x/mp.sqrt(2.))) + + @pytest.mark.parametrize('x, ref', [(1e-40, 7.978845608028653e-41), + (1e-18, 7.978845608028654e-19), + (8, 0.9999999999999988)]) + def test_cdf(self, x, ref): + assert_allclose(stats.halfnorm.cdf(x), ref, rtol=1e-15) + + @pytest.mark.parametrize("rvs_loc", [1e-5, 1e10]) + @pytest.mark.parametrize("rvs_scale", [1e-2, 100, 1e8]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + def test_fit_MLE_comp_optimizer(self, rvs_loc, rvs_scale, + fix_loc, fix_scale): + + rng = np.random.default_rng(6762668991392531563) + data = stats.halfnorm.rvs(loc=rvs_loc, scale=rvs_scale, size=1000, + random_state=rng) + + if fix_loc and fix_scale: + error_msg = ("All parameters fixed. There is nothing to " + "optimize.") + with pytest.raises(RuntimeError, match=error_msg): + stats.halflogistic.fit(data, floc=rvs_loc, fscale=rvs_scale) + return + + kwds = {} + if fix_loc: + kwds['floc'] = rvs_loc + if fix_scale: + kwds['fscale'] = rvs_scale + + # Numerical result may equal analytical result if the initial guess + # computed from moment condition is already optimal. + _assert_less_or_close_loglike(stats.halfnorm, data, **kwds, + maybe_identical=True) + + def test_fit_error(self): + # `floc` bigger than the minimal data point + with pytest.raises(FitDataError): + stats.halfnorm.fit([1, 2, 3], floc=2) + + +class TestHalfCauchy: + + @pytest.mark.parametrize("rvs_loc", [1e-5, 1e10]) + @pytest.mark.parametrize("rvs_scale", [1e-2, 1e8]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + def test_fit_MLE_comp_optimizer(self, rvs_loc, rvs_scale, + fix_loc, fix_scale): + + rng = np.random.default_rng(6762668991392531563) + data = stats.halfnorm.rvs(loc=rvs_loc, scale=rvs_scale, size=1000, + random_state=rng) + + if fix_loc and fix_scale: + error_msg = ("All parameters fixed. There is nothing to " + "optimize.") + with pytest.raises(RuntimeError, match=error_msg): + stats.halfcauchy.fit(data, floc=rvs_loc, fscale=rvs_scale) + return + + kwds = {} + if fix_loc: + kwds['floc'] = rvs_loc + if fix_scale: + kwds['fscale'] = rvs_scale + + _assert_less_or_close_loglike(stats.halfcauchy, data, **kwds) + + def test_fit_error(self): + # `floc` bigger than the minimal data point + with pytest.raises(FitDataError): + stats.halfcauchy.fit([1, 2, 3], floc=2) + + +class TestHalfLogistic: + # survival function reference values were computed with mpmath + # from mpmath import mp + # mp.dps = 50 + # def sf_mpmath(x): + # x = mp.mpf(x) + # return float(mp.mpf(2.)/(mp.exp(x) + mp.one)) + + @pytest.mark.parametrize('x, ref', [(100, 7.440151952041672e-44), + (200, 2.767793053473475e-87)]) + def test_sf(self, x, ref): + assert_allclose(stats.halflogistic.sf(x), ref, rtol=1e-15) + + # inverse survival function reference values were computed with mpmath + # from mpmath import mp + # mp.dps = 200 + # def isf_mpmath(x): + # halfx = mp.mpf(x)/2 + # return float(-mp.log(halfx/(mp.one - halfx))) + + @pytest.mark.parametrize('q, ref', [(7.440151952041672e-44, 100), + (2.767793053473475e-87, 200), + (1-1e-9, 1.999999943436137e-09), + (1-1e-15, 1.9984014443252818e-15)]) + def test_isf(self, q, ref): + assert_allclose(stats.halflogistic.isf(q), ref, rtol=1e-15) + + @pytest.mark.parametrize("rvs_loc", [1e-5, 1e10]) + @pytest.mark.parametrize("rvs_scale", [1e-2, 100, 1e8]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + def test_fit_MLE_comp_optimizer(self, rvs_loc, rvs_scale, + fix_loc, fix_scale): + + rng = np.random.default_rng(6762668991392531563) + data = stats.halflogistic.rvs(loc=rvs_loc, scale=rvs_scale, size=1000, + random_state=rng) + + kwds = {} + if fix_loc and fix_scale: + error_msg = ("All parameters fixed. There is nothing to " + "optimize.") + with pytest.raises(RuntimeError, match=error_msg): + stats.halflogistic.fit(data, floc=rvs_loc, fscale=rvs_scale) + return + + if fix_loc: + kwds['floc'] = rvs_loc + if fix_scale: + kwds['fscale'] = rvs_scale + + # Numerical result may equal analytical result if the initial guess + # computed from moment condition is already optimal. + _assert_less_or_close_loglike(stats.halflogistic, data, **kwds, + maybe_identical=True) + + def test_fit_bad_floc(self): + msg = r" Maximum likelihood estimation with 'halflogistic' requires" + with assert_raises(FitDataError, match=msg): + stats.halflogistic.fit([0, 2, 4], floc=1) + + +class TestHalfgennorm: + def test_expon(self): + # test against exponential (special case for beta=1) + points = [1, 2, 3] + pdf1 = stats.halfgennorm.pdf(points, 1) + pdf2 = stats.expon.pdf(points) + assert_almost_equal(pdf1, pdf2) + + def test_halfnorm(self): + # test against half normal (special case for beta=2) + points = [1, 2, 3] + pdf1 = stats.halfgennorm.pdf(points, 2) + pdf2 = stats.halfnorm.pdf(points, scale=2**-.5) + assert_almost_equal(pdf1, pdf2) + + def test_gennorm(self): + # test against generalized normal + points = [1, 2, 3] + pdf1 = stats.halfgennorm.pdf(points, .497324) + pdf2 = stats.gennorm.pdf(points, .497324) + assert_almost_equal(pdf1, 2*pdf2) + + +class TestLaplaceasymmetric: + def test_laplace(self): + # test against Laplace (special case for kappa=1) + points = np.array([1, 2, 3]) + pdf1 = stats.laplace_asymmetric.pdf(points, 1) + pdf2 = stats.laplace.pdf(points) + assert_allclose(pdf1, pdf2) + + def test_asymmetric_laplace_pdf(self): + # test asymmetric Laplace + points = np.array([1, 2, 3]) + kappa = 2 + kapinv = 1/kappa + pdf1 = stats.laplace_asymmetric.pdf(points, kappa) + pdf2 = stats.laplace_asymmetric.pdf(points*(kappa**2), kapinv) + assert_allclose(pdf1, pdf2) + + def test_asymmetric_laplace_log_10_16(self): + # test asymmetric Laplace + points = np.array([-np.log(16), np.log(10)]) + kappa = 2 + pdf1 = stats.laplace_asymmetric.pdf(points, kappa) + cdf1 = stats.laplace_asymmetric.cdf(points, kappa) + sf1 = stats.laplace_asymmetric.sf(points, kappa) + pdf2 = np.array([1/10, 1/250]) + cdf2 = np.array([1/5, 1 - 1/500]) + sf2 = np.array([4/5, 1/500]) + ppf1 = stats.laplace_asymmetric.ppf(cdf2, kappa) + ppf2 = points + isf1 = stats.laplace_asymmetric.isf(sf2, kappa) + isf2 = points + assert_allclose(np.concatenate((pdf1, cdf1, sf1, ppf1, isf1)), + np.concatenate((pdf2, cdf2, sf2, ppf2, isf2))) + + +class TestTruncnorm: + def setup_method(self): + np.random.seed(1234) + + @pytest.mark.parametrize("a, b, ref", + [(0, 100, 0.7257913526447274), + (0.6, 0.7, -2.3027610681852573), + (1e-06, 2e-06, -13.815510557964274)]) + def test_entropy(self, a, b, ref): + # All reference values were calculated with mpmath: + # import numpy as np + # from mpmath import mp + # mp.dps = 50 + # def entropy_trun(a, b): + # a, b = mp.mpf(a), mp.mpf(b) + # Z = mp.ncdf(b) - mp.ncdf(a) + # + # def pdf(x): + # return mp.npdf(x) / Z + # + # res = -mp.quad(lambda t: pdf(t) * mp.log(pdf(t)), [a, b]) + # return np.float64(res) + assert_allclose(stats.truncnorm.entropy(a, b), ref, rtol=1e-10) + + @pytest.mark.parametrize("a, b, ref", + [(1e-11, 10000000000.0, 0.725791352640738), + (1e-100, 1e+100, 0.7257913526447274), + (-1e-100, 1e+100, 0.7257913526447274), + (-1e+100, 1e+100, 1.4189385332046727)]) + def test_extreme_entropy(self, a, b, ref): + # The reference values were calculated with mpmath + # import numpy as np + # from mpmath import mp + # mp.dps = 50 + # def trunc_norm_entropy(a, b): + # a, b = mp.mpf(a), mp.mpf(b) + # Z = mp.ncdf(b) - mp.ncdf(a) + # A = mp.log(mp.sqrt(2 * mp.pi * mp.e) * Z) + # B = (a * mp.npdf(a) - b * mp.npdf(b)) / (2 * Z) + # return np.float64(A + B) + assert_allclose(stats.truncnorm.entropy(a, b), ref, rtol=1e-14) + + def test_ppf_ticket1131(self): + vals = stats.truncnorm.ppf([-0.5, 0, 1e-4, 0.5, 1-1e-4, 1, 2], -1., 1., + loc=[3]*7, scale=2) + expected = np.array([np.nan, 1, 1.00056419, 3, 4.99943581, 5, np.nan]) + assert_array_almost_equal(vals, expected) + + def test_isf_ticket1131(self): + vals = stats.truncnorm.isf([-0.5, 0, 1e-4, 0.5, 1-1e-4, 1, 2], -1., 1., + loc=[3]*7, scale=2) + expected = np.array([np.nan, 5, 4.99943581, 3, 1.00056419, 1, np.nan]) + assert_array_almost_equal(vals, expected) + + def test_gh_2477_small_values(self): + # Check a case that worked in the original issue. + low, high = -11, -10 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + # Check a case that failed in the original issue. + low, high = 10, 11 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + + def test_gh_2477_large_values(self): + # Check a case that used to fail because of extreme tailness. + low, high = 100, 101 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low <= x.min() <= x.max() <= high), str([low, high, x]) + + # Check some additional extreme tails + low, high = 1000, 1001 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + + low, high = 10000, 10001 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + + low, high = -10001, -10000 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + + def test_gh_9403_nontail_values(self): + for low, high in [[3, 4], [-4, -3]]: + xvals = np.array([-np.inf, low, high, np.inf]) + xmid = (high+low)/2.0 + cdfs = stats.truncnorm.cdf(xvals, low, high) + sfs = stats.truncnorm.sf(xvals, low, high) + pdfs = stats.truncnorm.pdf(xvals, low, high) + expected_cdfs = np.array([0, 0, 1, 1]) + expected_sfs = np.array([1.0, 1.0, 0.0, 0.0]) + expected_pdfs = np.array([0, 3.3619772, 0.1015229, 0]) + if low < 0: + expected_pdfs = np.array([0, 0.1015229, 3.3619772, 0]) + assert_almost_equal(cdfs, expected_cdfs) + assert_almost_equal(sfs, expected_sfs) + assert_almost_equal(pdfs, expected_pdfs) + assert_almost_equal(np.log(expected_pdfs[1]/expected_pdfs[2]), + low + 0.5) + pvals = np.array([0, 0.5, 1.0]) + ppfs = stats.truncnorm.ppf(pvals, low, high) + expected_ppfs = np.array([low, np.sign(low)*3.1984741, high]) + assert_almost_equal(ppfs, expected_ppfs) + + if low < 0: + assert_almost_equal(stats.truncnorm.sf(xmid, low, high), + 0.8475544278436675) + assert_almost_equal(stats.truncnorm.cdf(xmid, low, high), + 0.1524455721563326) + else: + assert_almost_equal(stats.truncnorm.cdf(xmid, low, high), + 0.8475544278436675) + assert_almost_equal(stats.truncnorm.sf(xmid, low, high), + 0.1524455721563326) + pdf = stats.truncnorm.pdf(xmid, low, high) + assert_almost_equal(np.log(pdf/expected_pdfs[2]), (xmid+0.25)/2) + + def test_gh_9403_medium_tail_values(self): + for low, high in [[39, 40], [-40, -39]]: + xvals = np.array([-np.inf, low, high, np.inf]) + xmid = (high+low)/2.0 + cdfs = stats.truncnorm.cdf(xvals, low, high) + sfs = stats.truncnorm.sf(xvals, low, high) + pdfs = stats.truncnorm.pdf(xvals, low, high) + expected_cdfs = np.array([0, 0, 1, 1]) + expected_sfs = np.array([1.0, 1.0, 0.0, 0.0]) + expected_pdfs = np.array([0, 3.90256074e+01, 2.73349092e-16, 0]) + if low < 0: + expected_pdfs = np.array([0, 2.73349092e-16, + 3.90256074e+01, 0]) + assert_almost_equal(cdfs, expected_cdfs) + assert_almost_equal(sfs, expected_sfs) + assert_almost_equal(pdfs, expected_pdfs) + assert_almost_equal(np.log(expected_pdfs[1]/expected_pdfs[2]), + low + 0.5) + pvals = np.array([0, 0.5, 1.0]) + ppfs = stats.truncnorm.ppf(pvals, low, high) + expected_ppfs = np.array([low, np.sign(low)*39.01775731, high]) + assert_almost_equal(ppfs, expected_ppfs) + cdfs = stats.truncnorm.cdf(ppfs, low, high) + assert_almost_equal(cdfs, pvals) + + if low < 0: + assert_almost_equal(stats.truncnorm.sf(xmid, low, high), + 0.9999999970389126) + assert_almost_equal(stats.truncnorm.cdf(xmid, low, high), + 2.961048103554866e-09) + else: + assert_almost_equal(stats.truncnorm.cdf(xmid, low, high), + 0.9999999970389126) + assert_almost_equal(stats.truncnorm.sf(xmid, low, high), + 2.961048103554866e-09) + pdf = stats.truncnorm.pdf(xmid, low, high) + assert_almost_equal(np.log(pdf/expected_pdfs[2]), (xmid+0.25)/2) + + xvals = np.linspace(low, high, 11) + xvals2 = -xvals[::-1] + assert_almost_equal(stats.truncnorm.cdf(xvals, low, high), + stats.truncnorm.sf(xvals2, -high, -low)[::-1]) + assert_almost_equal(stats.truncnorm.sf(xvals, low, high), + stats.truncnorm.cdf(xvals2, -high, -low)[::-1]) + assert_almost_equal(stats.truncnorm.pdf(xvals, low, high), + stats.truncnorm.pdf(xvals2, -high, -low)[::-1]) + + def test_cdf_tail_15110_14753(self): + # Check accuracy issues reported in gh-14753 and gh-155110 + # Ground truth values calculated using Wolfram Alpha, e.g. + # (CDF[NormalDistribution[0,1],83/10]-CDF[NormalDistribution[0,1],8])/ + # (1 - CDF[NormalDistribution[0,1],8]) + assert_allclose(stats.truncnorm(13., 15.).cdf(14.), + 0.9999987259565643) + assert_allclose(stats.truncnorm(8, np.inf).cdf(8.3), + 0.9163220907327540) + + # Test data for the truncnorm stats() method. + # The data in each row is: + # a, b, mean, variance, skewness, excess kurtosis. Generated using + # https://gist.github.com/WarrenWeckesser/636b537ee889679227d53543d333a720 + _truncnorm_stats_data = [ + [-30, 30, + 0.0, 1.0, 0.0, 0.0], + [-10, 10, + 0.0, 1.0, 0.0, -1.4927521335810455e-19], + [-3, 3, + 0.0, 0.9733369246625415, 0.0, -0.17111443639774404], + [-2, 2, + 0.0, 0.7737413035499232, 0.0, -0.6344632828703505], + [0, np.inf, + 0.7978845608028654, + 0.3633802276324187, + 0.995271746431156, + 0.8691773036059741], + [-np.inf, 0, + -0.7978845608028654, + 0.3633802276324187, + -0.995271746431156, + 0.8691773036059741], + [-1, 3, + 0.282786110727154, + 0.6161417353578293, + 0.5393018494027877, + -0.20582065135274694], + [-3, 1, + -0.282786110727154, + 0.6161417353578293, + -0.5393018494027877, + -0.20582065135274694], + [-10, -9, + -9.108456288012409, + 0.011448805821636248, + -1.8985607290949496, + 5.0733461105025075], + ] + _truncnorm_stats_data = np.array(_truncnorm_stats_data) + + @pytest.mark.parametrize("case", _truncnorm_stats_data) + def test_moments(self, case): + a, b, m0, v0, s0, k0 = case + m, v, s, k = stats.truncnorm.stats(a, b, moments='mvsk') + assert_allclose([m, v, s, k], [m0, v0, s0, k0], atol=1e-17) + + def test_9902_moments(self): + m, v = stats.truncnorm.stats(0, np.inf, moments='mv') + assert_almost_equal(m, 0.79788456) + assert_almost_equal(v, 0.36338023) + + def test_gh_1489_trac_962_rvs(self): + # Check the original example. + low, high = 10, 15 + x = stats.truncnorm.rvs(low, high, 0, 1, size=10) + assert_(low < x.min() < x.max() < high) + + def test_gh_11299_rvs(self): + # Arose from investigating gh-11299 + # Test multiple shape parameters simultaneously. + low = [-10, 10, -np.inf, -5, -np.inf, -np.inf, -45, -45, 40, -10, 40] + high = [-5, 11, 5, np.inf, 40, -40, 40, -40, 45, np.inf, np.inf] + x = stats.truncnorm.rvs(low, high, size=(5, len(low))) + assert np.shape(x) == (5, len(low)) + assert_(np.all(low <= x.min(axis=0))) + assert_(np.all(x.max(axis=0) <= high)) + + def test_rvs_Generator(self): + # check that rvs can use a Generator + if hasattr(np.random, "default_rng"): + stats.truncnorm.rvs(-10, -5, size=5, + random_state=np.random.default_rng()) + + def test_logcdf_gh17064(self): + # regression test for gh-17064 - avoid roundoff error for logcdfs ~0 + a = np.array([-np.inf, -np.inf, -8, -np.inf, 10]) + b = np.array([np.inf, np.inf, 8, 10, np.inf]) + x = np.array([10, 7.5, 7.5, 9, 20]) + expected = [-7.619853024160525e-24, -3.190891672910947e-14, + -3.128682067168231e-14, -1.1285122074235991e-19, + -3.61374964828753e-66] + assert_allclose(stats.truncnorm(a, b).logcdf(x), expected) + assert_allclose(stats.truncnorm(-b, -a).logsf(-x), expected) + + def test_moments_gh18634(self): + # gh-18634 reported that moments 5 and higher didn't work; check that + # this is resolved + res = stats.truncnorm(-2, 3).moment(5) + # From Mathematica: + # Moment[TruncatedDistribution[{-2, 3}, NormalDistribution[]], 5] + ref = 1.645309620208361 + assert_allclose(res, ref) + + +class TestGenLogistic: + + # Expected values computed with mpmath with 50 digits of precision. + @pytest.mark.parametrize('x, expected', [(-1000, -1499.5945348918917), + (-125, -187.09453489189184), + (0, -1.3274028432916989), + (100, -99.59453489189184), + (1000, -999.5945348918918)]) + def test_logpdf(self, x, expected): + c = 1.5 + logp = stats.genlogistic.logpdf(x, c) + assert_allclose(logp, expected, rtol=1e-13) + + # Expected values computed with mpmath with 50 digits of precision + # from mpmath import mp + # mp.dps = 50 + # def entropy_mp(c): + # c = mp.mpf(c) + # return float(-mp.log(c)+mp.one+mp.digamma(c + mp.one) + mp.euler) + + @pytest.mark.parametrize('c, ref', [(1e-100, 231.25850929940458), + (1e-4, 10.21050485336338), + (1e8, 1.577215669901533), + (1e100, 1.5772156649015328)]) + def test_entropy(self, c, ref): + assert_allclose(stats.genlogistic.entropy(c), ref, rtol=5e-15) + + # Expected values computed with mpmath with 50 digits of precision + # from mpmath import mp + # mp.dps = 1000 + # + # def genlogistic_cdf_mp(x, c): + # x = mp.mpf(x) + # c = mp.mpf(c) + # return (mp.one + mp.exp(-x)) ** (-c) + # + # def genlogistic_sf_mp(x, c): + # return mp.one - genlogistic_cdf_mp(x, c) + # + # x, c, ref = 100, 0.02, -7.440151952041672e-466 + # print(float(mp.log(genlogistic_cdf_mp(x, c)))) + # ppf/isf reference values generated by passing in `ref` (`q` is produced) + + @pytest.mark.parametrize('x, c, ref', [(200, 10, 1.3838965267367375e-86), + (500, 20, 1.424915281348257e-216)]) + def test_sf(self, x, c, ref): + assert_allclose(stats.genlogistic.sf(x, c), ref, rtol=1e-14) + + @pytest.mark.parametrize('q, c, ref', [(0.01, 200, 9.898441467379765), + (0.001, 2, 7.600152115573173)]) + def test_isf(self, q, c, ref): + assert_allclose(stats.genlogistic.isf(q, c), ref, rtol=5e-16) + + @pytest.mark.parametrize('q, c, ref', [(0.5, 200, 5.6630969187064615), + (0.99, 20, 7.595630231412436)]) + def test_ppf(self, q, c, ref): + assert_allclose(stats.genlogistic.ppf(q, c), ref, rtol=5e-16) + + @pytest.mark.parametrize('x, c, ref', [(100, 0.02, -7.440151952041672e-46), + (50, 20, -3.857499695927835e-21)]) + def test_logcdf(self, x, c, ref): + assert_allclose(stats.genlogistic.logcdf(x, c), ref, rtol=1e-15) + + +class TestHypergeom: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.hypergeom.rvs(20, 10, 3, size=(2, 50)) + assert_(numpy.all(vals >= 0) & + numpy.all(vals <= 3)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.hypergeom.rvs(20, 3, 10) + assert_(isinstance(val, int)) + val = stats.hypergeom(20, 3, 10).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_precision(self): + # comparison number from mpmath + M = 2500 + n = 50 + N = 500 + tot = M + good = n + hgpmf = stats.hypergeom.pmf(2, tot, good, N) + assert_almost_equal(hgpmf, 0.0010114963068932233, 11) + + def test_args(self): + # test correct output for corner cases of arguments + # see gh-2325 + assert_almost_equal(stats.hypergeom.pmf(0, 2, 1, 0), 1.0, 11) + assert_almost_equal(stats.hypergeom.pmf(1, 2, 1, 0), 0.0, 11) + + assert_almost_equal(stats.hypergeom.pmf(0, 2, 0, 2), 1.0, 11) + assert_almost_equal(stats.hypergeom.pmf(1, 2, 1, 0), 0.0, 11) + + def test_cdf_above_one(self): + # for some values of parameters, hypergeom cdf was >1, see gh-2238 + assert_(0 <= stats.hypergeom.cdf(30, 13397950, 4363, 12390) <= 1.0) + + def test_precision2(self): + # Test hypergeom precision for large numbers. See #1218. + # Results compared with those from R. + oranges = 9.9e4 + pears = 1.1e5 + fruits_eaten = np.array([3, 3.8, 3.9, 4, 4.1, 4.2, 5]) * 1e4 + quantile = 2e4 + res = [stats.hypergeom.sf(quantile, oranges + pears, oranges, eaten) + for eaten in fruits_eaten] + expected = np.array([0, 1.904153e-114, 2.752693e-66, 4.931217e-32, + 8.265601e-11, 0.1237904, 1]) + assert_allclose(res, expected, atol=0, rtol=5e-7) + + # Test with array_like first argument + quantiles = [1.9e4, 2e4, 2.1e4, 2.15e4] + res2 = stats.hypergeom.sf(quantiles, oranges + pears, oranges, 4.2e4) + expected2 = [1, 0.1237904, 6.511452e-34, 3.277667e-69] + assert_allclose(res2, expected2, atol=0, rtol=5e-7) + + def test_entropy(self): + # Simple tests of entropy. + hg = stats.hypergeom(4, 1, 1) + h = hg.entropy() + expected_p = np.array([0.75, 0.25]) + expected_h = -np.sum(xlogy(expected_p, expected_p)) + assert_allclose(h, expected_h) + + hg = stats.hypergeom(1, 1, 1) + h = hg.entropy() + assert_equal(h, 0.0) + + def test_logsf(self): + # Test logsf for very large numbers. See issue #4982 + # Results compare with those from R (v3.2.0): + # phyper(k, n, M-n, N, lower.tail=FALSE, log.p=TRUE) + # -2239.771 + + k = 1e4 + M = 1e7 + n = 1e6 + N = 5e4 + + result = stats.hypergeom.logsf(k, M, n, N) + expected = -2239.771 # From R + assert_almost_equal(result, expected, decimal=3) + + k = 1 + M = 1600 + n = 600 + N = 300 + + result = stats.hypergeom.logsf(k, M, n, N) + expected = -2.566567e-68 # From R + assert_almost_equal(result, expected, decimal=15) + + def test_logcdf(self): + # Test logcdf for very large numbers. See issue #8692 + # Results compare with those from R (v3.3.2): + # phyper(k, n, M-n, N, lower.tail=TRUE, log.p=TRUE) + # -5273.335 + + k = 1 + M = 1e7 + n = 1e6 + N = 5e4 + + result = stats.hypergeom.logcdf(k, M, n, N) + expected = -5273.335 # From R + assert_almost_equal(result, expected, decimal=3) + + # Same example as in issue #8692 + k = 40 + M = 1600 + n = 50 + N = 300 + + result = stats.hypergeom.logcdf(k, M, n, N) + expected = -7.565148879229e-23 # From R + assert_almost_equal(result, expected, decimal=15) + + k = 125 + M = 1600 + n = 250 + N = 500 + + result = stats.hypergeom.logcdf(k, M, n, N) + expected = -4.242688e-12 # From R + assert_almost_equal(result, expected, decimal=15) + + # test broadcasting robustness based on reviewer + # concerns in PR 9603; using an array version of + # the example from issue #8692 + k = np.array([40, 40, 40]) + M = 1600 + n = 50 + N = 300 + + result = stats.hypergeom.logcdf(k, M, n, N) + expected = np.full(3, -7.565148879229e-23) # filled from R result + assert_almost_equal(result, expected, decimal=15) + + def test_mean_gh18511(self): + # gh-18511 reported that the `mean` was incorrect for large arguments; + # check that this is resolved + M = 390_000 + n = 370_000 + N = 12_000 + + hm = stats.hypergeom.mean(M, n, N) + rm = n / M * N + assert_allclose(hm, rm) + + def test_sf_gh18506(self): + # gh-18506 reported that `sf` was incorrect for large population; + # check that this is resolved + n = 10 + N = 10**5 + i = np.arange(5, 15) + population_size = 10.**i + p = stats.hypergeom.sf(n - 1, population_size, N, n) + assert np.all(p > 0) + assert np.all(np.diff(p) < 0) + + +class TestLoggamma: + + # Expected cdf values were computed with mpmath. For given x and c, + # x = mpmath.mpf(x) + # c = mpmath.mpf(c) + # cdf = mpmath.gammainc(c, 0, mpmath.exp(x), + # regularized=True) + @pytest.mark.parametrize('x, c, cdf', + [(1, 2, 0.7546378854206702), + (-1, 14, 6.768116452566383e-18), + (-745.1, 0.001, 0.4749605142005238), + (-800, 0.001, 0.44958802911019136), + (-725, 0.1, 3.4301205868273265e-32), + (-740, 0.75, 1.0074360436599631e-241)]) + def test_cdf_ppf(self, x, c, cdf): + p = stats.loggamma.cdf(x, c) + assert_allclose(p, cdf, rtol=1e-13) + y = stats.loggamma.ppf(cdf, c) + assert_allclose(y, x, rtol=1e-13) + + # Expected sf values were computed with mpmath. For given x and c, + # x = mpmath.mpf(x) + # c = mpmath.mpf(c) + # sf = mpmath.gammainc(c, mpmath.exp(x), mpmath.inf, + # regularized=True) + @pytest.mark.parametrize('x, c, sf', + [(4, 1.5, 1.6341528919488565e-23), + (6, 100, 8.23836829202024e-74), + (-800, 0.001, 0.5504119708898086), + (-743, 0.0025, 0.8437131370024089)]) + def test_sf_isf(self, x, c, sf): + s = stats.loggamma.sf(x, c) + assert_allclose(s, sf, rtol=1e-13) + y = stats.loggamma.isf(sf, c) + assert_allclose(y, x, rtol=1e-13) + + def test_logpdf(self): + # Test logpdf with x=-500, c=2. ln(gamma(2)) = 0, and + # exp(-500) ~= 7e-218, which is far smaller than the ULP + # of c*x=-1000, so logpdf(-500, 2) = c*x - exp(x) - ln(gamma(2)) + # should give -1000.0. + lp = stats.loggamma.logpdf(-500, 2) + assert_allclose(lp, -1000.0, rtol=1e-14) + + def test_stats(self): + # The following precomputed values are from the table in section 2.2 + # of "A Statistical Study of Log-Gamma Distribution", by Ping Shing + # Chan (thesis, McMaster University, 1993). + table = np.array([ + # c, mean, var, skew, exc. kurt. + 0.5, -1.9635, 4.9348, -1.5351, 4.0000, + 1.0, -0.5772, 1.6449, -1.1395, 2.4000, + 12.0, 2.4427, 0.0869, -0.2946, 0.1735, + ]).reshape(-1, 5) + for c, mean, var, skew, kurt in table: + computed = stats.loggamma.stats(c, moments='msvk') + assert_array_almost_equal(computed, [mean, var, skew, kurt], + decimal=4) + + @pytest.mark.parametrize('c', [0.1, 0.001]) + def test_rvs(self, c): + # Regression test for gh-11094. + x = stats.loggamma.rvs(c, size=100000) + # Before gh-11094 was fixed, the case with c=0.001 would + # generate many -inf values. + assert np.isfinite(x).all() + # Crude statistical test. About half the values should be + # less than the median and half greater than the median. + med = stats.loggamma.median(c) + btest = stats.binomtest(np.count_nonzero(x < med), len(x)) + ci = btest.proportion_ci(confidence_level=0.999) + assert ci.low < 0.5 < ci.high + + @pytest.mark.parametrize("c, ref", + [(1e-8, 19.420680753952364), + (1, 1.5772156649015328), + (1e4, -3.186214986116763), + (1e10, -10.093986931748889), + (1e100, -113.71031611649761)]) + def test_entropy(self, c, ref): + + # Reference values were calculated with mpmath + # from mpmath import mp + # mp.dps = 500 + # def loggamma_entropy_mpmath(c): + # c = mp.mpf(c) + # return float(mp.log(mp.gamma(c)) + c * (mp.one - mp.digamma(c))) + + assert_allclose(stats.loggamma.entropy(c), ref, rtol=1e-14) + + +class TestJohnsonsu: + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 50 + # def johnsonsu_sf(x, a, b): + # x = mp.mpf(x) + # a = mp.mpf(a) + # b = mp.mpf(b) + # return float(mp.ncdf(-(a + b * mp.log(x + mp.sqrt(x*x + 1))))) + # Order is x, a, b, sf, isf tol + # (Can't expect full precision when the ISF input is very nearly 1) + cases = [(-500, 1, 1, 0.9999999982660072, 1e-8), + (2000, 1, 1, 7.426351000595343e-21, 5e-14), + (100000, 1, 1, 4.046923979269977e-40, 5e-14)] + + @pytest.mark.parametrize("case", cases) + def test_sf_isf(self, case): + x, a, b, sf, tol = case + assert_allclose(stats.johnsonsu.sf(x, a, b), sf, rtol=5e-14) + assert_allclose(stats.johnsonsu.isf(sf, a, b), x, rtol=tol) + + +class TestJohnsonb: + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 50 + # def johnsonb_sf(x, a, b): + # x = mp.mpf(x) + # a = mp.mpf(a) + # b = mp.mpf(b) + # return float(mp.ncdf(-(a + b * mp.log(x/(mp.one - x))))) + # Order is x, a, b, sf, isf atol + # (Can't expect full precision when the ISF input is very nearly 1) + cases = [(1e-4, 1, 1, 0.9999999999999999, 1e-7), + (0.9999, 1, 1, 8.921114313932308e-25, 5e-14), + (0.999999, 1, 1, 5.815197487181902e-50, 5e-14)] + + @pytest.mark.parametrize("case", cases) + def test_sf_isf(self, case): + x, a, b, sf, tol = case + assert_allclose(stats.johnsonsb.sf(x, a, b), sf, rtol=5e-14) + assert_allclose(stats.johnsonsb.isf(sf, a, b), x, atol=tol) + + +class TestLogistic: + # gh-6226 + def test_cdf_ppf(self): + x = np.linspace(-20, 20) + y = stats.logistic.cdf(x) + xx = stats.logistic.ppf(y) + assert_allclose(x, xx) + + def test_sf_isf(self): + x = np.linspace(-20, 20) + y = stats.logistic.sf(x) + xx = stats.logistic.isf(y) + assert_allclose(x, xx) + + def test_extreme_values(self): + # p is chosen so that 1 - (1 - p) == p in double precision + p = 9.992007221626409e-16 + desired = 34.53957599234088 + assert_allclose(stats.logistic.ppf(1 - p), desired) + assert_allclose(stats.logistic.isf(p), desired) + + def test_logpdf_basic(self): + logp = stats.logistic.logpdf([-15, 0, 10]) + # Expected values computed with mpmath with 50 digits of precision. + expected = [-15.000000611804547, + -1.3862943611198906, + -10.000090797798434] + assert_allclose(logp, expected, rtol=1e-13) + + def test_logpdf_extreme_values(self): + logp = stats.logistic.logpdf([800, -800]) + # For such large arguments, logpdf(x) = -abs(x) when computed + # with 64 bit floating point. + assert_equal(logp, [-800, -800]) + + @pytest.mark.parametrize("loc_rvs,scale_rvs", [(0.4484955, 0.10216821), + (0.62918191, 0.74367064)]) + def test_fit(self, loc_rvs, scale_rvs): + data = stats.logistic.rvs(size=100, loc=loc_rvs, scale=scale_rvs) + + # test that result of fit method is the same as optimization + def func(input, data): + a, b = input + n = len(data) + x1 = np.sum(np.exp((data - a) / b) / + (1 + np.exp((data - a) / b))) - n / 2 + x2 = np.sum(((data - a) / b) * + ((np.exp((data - a) / b) - 1) / + (np.exp((data - a) / b) + 1))) - n + return x1, x2 + + expected_solution = root(func, stats.logistic._fitstart(data), args=( + data,)).x + fit_method = stats.logistic.fit(data) + + # other than computational variances, the fit method and the solution + # to this system of equations are equal + assert_allclose(fit_method, expected_solution, atol=1e-30) + + def test_fit_comp_optimizer(self): + data = stats.logistic.rvs(size=100, loc=0.5, scale=2) + _assert_less_or_close_loglike(stats.logistic, data) + _assert_less_or_close_loglike(stats.logistic, data, floc=1) + _assert_less_or_close_loglike(stats.logistic, data, fscale=1) + + @pytest.mark.parametrize('testlogcdf', [True, False]) + def test_logcdfsf_tails(self, testlogcdf): + # Test either logcdf or logsf. By symmetry, we can use the same + # expected values for both by switching the sign of x for logsf. + x = np.array([-10000, -800, 17, 50, 500]) + if testlogcdf: + y = stats.logistic.logcdf(x) + else: + y = stats.logistic.logsf(-x) + # The expected values were computed with mpmath. + expected = [-10000.0, -800.0, -4.139937633089748e-08, + -1.9287498479639178e-22, -7.124576406741286e-218] + assert_allclose(y, expected, rtol=2e-15) + + def test_fit_gh_18176(self): + # logistic.fit returned `scale < 0` for this data. Check that this has + # been fixed. + data = np.array([-459, 37, 43, 45, 45, 48, 54, 55, 58] + + [59] * 3 + [61] * 9) + # If scale were negative, NLLF would be infinite, so this would fail + _assert_less_or_close_loglike(stats.logistic, data) + + +class TestLogser: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.logser.rvs(0.75, size=(2, 50)) + assert_(numpy.all(vals >= 1)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.logser.rvs(0.75) + assert_(isinstance(val, int)) + val = stats.logser(0.75).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_pmf_small_p(self): + m = stats.logser.pmf(4, 1e-20) + # The expected value was computed using mpmath: + # >>> import mpmath + # >>> mpmath.mp.dps = 64 + # >>> k = 4 + # >>> p = mpmath.mpf('1e-20') + # >>> float(-(p**k)/k/mpmath.log(1-p)) + # 2.5e-61 + # It is also clear from noticing that for very small p, + # log(1-p) is approximately -p, and the formula becomes + # p**(k-1) / k + assert_allclose(m, 2.5e-61) + + def test_mean_small_p(self): + m = stats.logser.mean(1e-8) + # The expected mean was computed using mpmath: + # >>> import mpmath + # >>> mpmath.dps = 60 + # >>> p = mpmath.mpf('1e-8') + # >>> float(-p / ((1 - p)*mpmath.log(1 - p))) + # 1.000000005 + assert_allclose(m, 1.000000005) + + +class TestGumbel_r_l: + @pytest.fixture(scope='function') + def rng(self): + return np.random.default_rng(1234) + + @pytest.mark.parametrize("dist", [stats.gumbel_r, stats.gumbel_l]) + @pytest.mark.parametrize("loc_rvs", [-1, 0, 1]) + @pytest.mark.parametrize("scale_rvs", [.1, 1, 5]) + @pytest.mark.parametrize('fix_loc, fix_scale', + ([True, False], [False, True])) + def test_fit_comp_optimizer(self, dist, loc_rvs, scale_rvs, + fix_loc, fix_scale, rng): + data = dist.rvs(size=100, loc=loc_rvs, scale=scale_rvs, + random_state=rng) + + kwds = dict() + # the fixed location and scales are arbitrarily modified to not be + # close to the true value. + if fix_loc: + kwds['floc'] = loc_rvs * 2 + if fix_scale: + kwds['fscale'] = scale_rvs * 2 + + # test that the gumbel_* fit method is better than super method + _assert_less_or_close_loglike(dist, data, **kwds) + + @pytest.mark.parametrize("dist, sgn", [(stats.gumbel_r, 1), + (stats.gumbel_l, -1)]) + def test_fit(self, dist, sgn): + z = sgn*np.array([3, 3, 3, 3, 3, 3, 3, 3.00000001]) + loc, scale = dist.fit(z) + # The expected values were computed with mpmath with 60 digits + # of precision. + assert_allclose(loc, sgn*3.0000000001667906) + assert_allclose(scale, 1.2495222465145514e-09, rtol=1e-6) + + +class TestPareto: + def test_stats(self): + # Check the stats() method with some simple values. Also check + # that the calculations do not trigger RuntimeWarnings. + with warnings.catch_warnings(): + warnings.simplefilter("error", RuntimeWarning) + + m, v, s, k = stats.pareto.stats(0.5, moments='mvsk') + assert_equal(m, np.inf) + assert_equal(v, np.inf) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(1.0, moments='mvsk') + assert_equal(m, np.inf) + assert_equal(v, np.inf) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(1.5, moments='mvsk') + assert_equal(m, 3.0) + assert_equal(v, np.inf) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(2.0, moments='mvsk') + assert_equal(m, 2.0) + assert_equal(v, np.inf) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(2.5, moments='mvsk') + assert_allclose(m, 2.5 / 1.5) + assert_allclose(v, 2.5 / (1.5*1.5*0.5)) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(3.0, moments='mvsk') + assert_allclose(m, 1.5) + assert_allclose(v, 0.75) + assert_equal(s, np.nan) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(3.5, moments='mvsk') + assert_allclose(m, 3.5 / 2.5) + assert_allclose(v, 3.5 / (2.5*2.5*1.5)) + assert_allclose(s, (2*4.5/0.5)*np.sqrt(1.5/3.5)) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(4.0, moments='mvsk') + assert_allclose(m, 4.0 / 3.0) + assert_allclose(v, 4.0 / 18.0) + assert_allclose(s, 2*(1+4.0)/(4.0-3) * np.sqrt((4.0-2)/4.0)) + assert_equal(k, np.nan) + + m, v, s, k = stats.pareto.stats(4.5, moments='mvsk') + assert_allclose(m, 4.5 / 3.5) + assert_allclose(v, 4.5 / (3.5*3.5*2.5)) + assert_allclose(s, (2*5.5/1.5) * np.sqrt(2.5/4.5)) + assert_allclose(k, 6*(4.5**3 + 4.5**2 - 6*4.5 - 2)/(4.5*1.5*0.5)) + + def test_sf(self): + x = 1e9 + b = 2 + scale = 1.5 + p = stats.pareto.sf(x, b, loc=0, scale=scale) + expected = (scale/x)**b # 2.25e-18 + assert_allclose(p, expected) + + @pytest.fixture(scope='function') + def rng(self): + return np.random.default_rng(1234) + + @pytest.mark.filterwarnings("ignore:invalid value encountered in " + "double_scalars") + @pytest.mark.parametrize("rvs_shape", [1, 2]) + @pytest.mark.parametrize("rvs_loc", [0, 2]) + @pytest.mark.parametrize("rvs_scale", [1, 5]) + def test_fit(self, rvs_shape, rvs_loc, rvs_scale, rng): + data = stats.pareto.rvs(size=100, b=rvs_shape, scale=rvs_scale, + loc=rvs_loc, random_state=rng) + + # shape can still be fixed with multiple names + shape_mle_analytical1 = stats.pareto.fit(data, floc=0, f0=1.04)[0] + shape_mle_analytical2 = stats.pareto.fit(data, floc=0, fix_b=1.04)[0] + shape_mle_analytical3 = stats.pareto.fit(data, floc=0, fb=1.04)[0] + assert (shape_mle_analytical1 == shape_mle_analytical2 == + shape_mle_analytical3 == 1.04) + + # data can be shifted with changes to `loc` + data = stats.pareto.rvs(size=100, b=rvs_shape, scale=rvs_scale, + loc=(rvs_loc + 2), random_state=rng) + shape_mle_a, loc_mle_a, scale_mle_a = stats.pareto.fit(data, floc=2) + assert_equal(scale_mle_a + 2, data.min()) + + data_shift = data - 2 + ndata = data_shift.shape[0] + assert_equal(shape_mle_a, + ndata / np.sum(np.log(data_shift/data_shift.min()))) + assert_equal(loc_mle_a, 2) + + @pytest.mark.parametrize("rvs_shape", [.1, 2]) + @pytest.mark.parametrize("rvs_loc", [0, 2]) + @pytest.mark.parametrize("rvs_scale", [1, 5]) + @pytest.mark.parametrize('fix_shape, fix_loc, fix_scale', + [p for p in product([True, False], repeat=3) + if False in p]) + @np.errstate(invalid="ignore") + def test_fit_MLE_comp_optimizer(self, rvs_shape, rvs_loc, rvs_scale, + fix_shape, fix_loc, fix_scale, rng): + data = stats.pareto.rvs(size=100, b=rvs_shape, scale=rvs_scale, + loc=rvs_loc, random_state=rng) + + kwds = {} + if fix_shape: + kwds['f0'] = rvs_shape + if fix_loc: + kwds['floc'] = rvs_loc + if fix_scale: + kwds['fscale'] = rvs_scale + + _assert_less_or_close_loglike(stats.pareto, data, **kwds) + + @np.errstate(invalid="ignore") + def test_fit_known_bad_seed(self): + # Tests a known seed and set of parameters that would produce a result + # would violate the support of Pareto if the fit method did not check + # the constraint `fscale + floc < min(data)`. + shape, location, scale = 1, 0, 1 + data = stats.pareto.rvs(shape, location, scale, size=100, + random_state=np.random.default_rng(2535619)) + _assert_less_or_close_loglike(stats.pareto, data) + + def test_fit_warnings(self): + assert_fit_warnings(stats.pareto) + # `floc` that causes invalid negative data + assert_raises(FitDataError, stats.pareto.fit, [1, 2, 3], floc=2) + # `floc` and `fscale` combination causes invalid data + assert_raises(FitDataError, stats.pareto.fit, [5, 2, 3], floc=1, + fscale=3) + + def test_negative_data(self, rng): + data = stats.pareto.rvs(loc=-130, b=1, size=100, random_state=rng) + assert_array_less(data, 0) + # The purpose of this test is to make sure that no runtime warnings are + # raised for all negative data, not the output of the fit method. Other + # methods test the output but have to silence warnings from the super + # method. + _ = stats.pareto.fit(data) + + +class TestGenpareto: + def test_ab(self): + # c >= 0: a, b = [0, inf] + for c in [1., 0.]: + c = np.asarray(c) + a, b = stats.genpareto._get_support(c) + assert_equal(a, 0.) + assert_(np.isposinf(b)) + + # c < 0: a=0, b=1/|c| + c = np.asarray(-2.) + a, b = stats.genpareto._get_support(c) + assert_allclose([a, b], [0., 0.5]) + + def test_c0(self): + # with c=0, genpareto reduces to the exponential distribution + # rv = stats.genpareto(c=0.) + rv = stats.genpareto(c=0.) + x = np.linspace(0, 10., 30) + assert_allclose(rv.pdf(x), stats.expon.pdf(x)) + assert_allclose(rv.cdf(x), stats.expon.cdf(x)) + assert_allclose(rv.sf(x), stats.expon.sf(x)) + + q = np.linspace(0., 1., 10) + assert_allclose(rv.ppf(q), stats.expon.ppf(q)) + + def test_cm1(self): + # with c=-1, genpareto reduces to the uniform distr on [0, 1] + rv = stats.genpareto(c=-1.) + x = np.linspace(0, 10., 30) + assert_allclose(rv.pdf(x), stats.uniform.pdf(x)) + assert_allclose(rv.cdf(x), stats.uniform.cdf(x)) + assert_allclose(rv.sf(x), stats.uniform.sf(x)) + + q = np.linspace(0., 1., 10) + assert_allclose(rv.ppf(q), stats.uniform.ppf(q)) + + # logpdf(1., c=-1) should be zero + assert_allclose(rv.logpdf(1), 0) + + def test_x_inf(self): + # make sure x=inf is handled gracefully + rv = stats.genpareto(c=0.1) + assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.]) + assert_(np.isneginf(rv.logpdf(np.inf))) + + rv = stats.genpareto(c=0.) + assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.]) + assert_(np.isneginf(rv.logpdf(np.inf))) + + rv = stats.genpareto(c=-1.) + assert_allclose([rv.pdf(np.inf), rv.cdf(np.inf)], [0., 1.]) + assert_(np.isneginf(rv.logpdf(np.inf))) + + def test_c_continuity(self): + # pdf is continuous at c=0, -1 + x = np.linspace(0, 10, 30) + for c in [0, -1]: + pdf0 = stats.genpareto.pdf(x, c) + for dc in [1e-14, -1e-14]: + pdfc = stats.genpareto.pdf(x, c + dc) + assert_allclose(pdf0, pdfc, atol=1e-12) + + cdf0 = stats.genpareto.cdf(x, c) + for dc in [1e-14, 1e-14]: + cdfc = stats.genpareto.cdf(x, c + dc) + assert_allclose(cdf0, cdfc, atol=1e-12) + + def test_c_continuity_ppf(self): + q = np.r_[np.logspace(1e-12, 0.01, base=0.1), + np.linspace(0.01, 1, 30, endpoint=False), + 1. - np.logspace(1e-12, 0.01, base=0.1)] + for c in [0., -1.]: + ppf0 = stats.genpareto.ppf(q, c) + for dc in [1e-14, -1e-14]: + ppfc = stats.genpareto.ppf(q, c + dc) + assert_allclose(ppf0, ppfc, atol=1e-12) + + def test_c_continuity_isf(self): + q = np.r_[np.logspace(1e-12, 0.01, base=0.1), + np.linspace(0.01, 1, 30, endpoint=False), + 1. - np.logspace(1e-12, 0.01, base=0.1)] + for c in [0., -1.]: + isf0 = stats.genpareto.isf(q, c) + for dc in [1e-14, -1e-14]: + isfc = stats.genpareto.isf(q, c + dc) + assert_allclose(isf0, isfc, atol=1e-12) + + def test_cdf_ppf_roundtrip(self): + # this should pass with machine precision. hat tip @pbrod + q = np.r_[np.logspace(1e-12, 0.01, base=0.1), + np.linspace(0.01, 1, 30, endpoint=False), + 1. - np.logspace(1e-12, 0.01, base=0.1)] + for c in [1e-8, -1e-18, 1e-15, -1e-15]: + assert_allclose(stats.genpareto.cdf(stats.genpareto.ppf(q, c), c), + q, atol=1e-15) + + def test_logsf(self): + logp = stats.genpareto.logsf(1e10, .01, 0, 1) + assert_allclose(logp, -1842.0680753952365) + + # Values in 'expected_stats' are + # [mean, variance, skewness, excess kurtosis]. + @pytest.mark.parametrize( + 'c, expected_stats', + [(0, [1, 1, 2, 6]), + (1/4, [4/3, 32/9, 10/np.sqrt(2), np.nan]), + (1/9, [9/8, (81/64)*(9/7), (10/9)*np.sqrt(7), 754/45]), + (-1, [1/2, 1/12, 0, -6/5])]) + def test_stats(self, c, expected_stats): + result = stats.genpareto.stats(c, moments='mvsk') + assert_allclose(result, expected_stats, rtol=1e-13, atol=1e-15) + + def test_var(self): + # Regression test for gh-11168. + v = stats.genpareto.var(1e-8) + assert_allclose(v, 1.000000040000001, rtol=1e-13) + + +class TestPearson3: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.pearson3.rvs(0.1, size=(2, 50)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllFloat']) + val = stats.pearson3.rvs(0.5) + assert_(isinstance(val, float)) + val = stats.pearson3(0.5).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllFloat']) + assert_(len(val) == 3) + + def test_pdf(self): + vals = stats.pearson3.pdf(2, [0.0, 0.1, 0.2]) + assert_allclose(vals, np.array([0.05399097, 0.05555481, 0.05670246]), + atol=1e-6) + vals = stats.pearson3.pdf(-3, 0.1) + assert_allclose(vals, np.array([0.00313791]), atol=1e-6) + vals = stats.pearson3.pdf([-3, -2, -1, 0, 1], 0.1) + assert_allclose(vals, np.array([0.00313791, 0.05192304, 0.25028092, + 0.39885918, 0.23413173]), atol=1e-6) + + def test_cdf(self): + vals = stats.pearson3.cdf(2, [0.0, 0.1, 0.2]) + assert_allclose(vals, np.array([0.97724987, 0.97462004, 0.97213626]), + atol=1e-6) + vals = stats.pearson3.cdf(-3, 0.1) + assert_allclose(vals, [0.00082256], atol=1e-6) + vals = stats.pearson3.cdf([-3, -2, -1, 0, 1], 0.1) + assert_allclose(vals, [8.22563821e-04, 1.99860448e-02, 1.58550710e-01, + 5.06649130e-01, 8.41442111e-01], atol=1e-6) + + def test_negative_cdf_bug_11186(self): + # incorrect CDFs for negative skews in gh-11186; fixed in gh-12640 + # Also check vectorization w/ negative, zero, and positive skews + skews = [-3, -1, 0, 0.5] + x_eval = 0.5 + neg_inf = -30 # avoid RuntimeWarning caused by np.log(0) + cdfs = stats.pearson3.cdf(x_eval, skews) + int_pdfs = [quad(stats.pearson3(skew).pdf, neg_inf, x_eval)[0] + for skew in skews] + assert_allclose(cdfs, int_pdfs) + + def test_return_array_bug_11746(self): + # pearson3.moment was returning size 0 or 1 array instead of float + # The first moment is equal to the loc, which defaults to zero + moment = stats.pearson3.moment(1, 2) + assert_equal(moment, 0) + assert isinstance(moment, np.number) + + moment = stats.pearson3.moment(1, 0.000001) + assert_equal(moment, 0) + assert isinstance(moment, np.number) + + def test_ppf_bug_17050(self): + # incorrect PPF for negative skews were reported in gh-17050 + # Check that this is fixed (even in the array case) + skews = [-3, -1, 0, 0.5] + x_eval = 0.5 + res = stats.pearson3.ppf(stats.pearson3.cdf(x_eval, skews), skews) + assert_allclose(res, x_eval) + + # Negation of the skew flips the distribution about the origin, so + # the following should hold + skew = np.array([[-0.5], [1.5]]) + x = np.linspace(-2, 2) + assert_allclose(stats.pearson3.pdf(x, skew), + stats.pearson3.pdf(-x, -skew)) + assert_allclose(stats.pearson3.cdf(x, skew), + stats.pearson3.sf(-x, -skew)) + assert_allclose(stats.pearson3.ppf(x, skew), + -stats.pearson3.isf(x, -skew)) + + def test_sf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 50; Pearson3(skew=skew).sf(x). Check positive, negative, + # and zero skew due to branching. + skew = [0.1, 0.5, 1.0, -0.1] + x = [5.0, 10.0, 50.0, 8.0] + ref = [1.64721926440872e-06, 8.271911573556123e-11, + 1.3149506021756343e-40, 2.763057937820296e-21] + assert_allclose(stats.pearson3.sf(x, skew), ref, rtol=2e-14) + assert_allclose(stats.pearson3.sf(x, 0), stats.norm.sf(x), rtol=2e-14) + + +class TestKappa4: + def test_cdf_genpareto(self): + # h = 1 and k != 0 is generalized Pareto + x = [0.0, 0.1, 0.2, 0.5] + h = 1.0 + for k in [-1.9, -1.0, -0.5, -0.2, -0.1, 0.1, 0.2, 0.5, 1.0, + 1.9]: + vals = stats.kappa4.cdf(x, h, k) + # shape parameter is opposite what is expected + vals_comp = stats.genpareto.cdf(x, -k) + assert_allclose(vals, vals_comp) + + def test_cdf_genextreme(self): + # h = 0 and k != 0 is generalized extreme value + x = np.linspace(-5, 5, 10) + h = 0.0 + k = np.linspace(-3, 3, 10) + vals = stats.kappa4.cdf(x, h, k) + vals_comp = stats.genextreme.cdf(x, k) + assert_allclose(vals, vals_comp) + + def test_cdf_expon(self): + # h = 1 and k = 0 is exponential + x = np.linspace(0, 10, 10) + h = 1.0 + k = 0.0 + vals = stats.kappa4.cdf(x, h, k) + vals_comp = stats.expon.cdf(x) + assert_allclose(vals, vals_comp) + + def test_cdf_gumbel_r(self): + # h = 0 and k = 0 is gumbel_r + x = np.linspace(-5, 5, 10) + h = 0.0 + k = 0.0 + vals = stats.kappa4.cdf(x, h, k) + vals_comp = stats.gumbel_r.cdf(x) + assert_allclose(vals, vals_comp) + + def test_cdf_logistic(self): + # h = -1 and k = 0 is logistic + x = np.linspace(-5, 5, 10) + h = -1.0 + k = 0.0 + vals = stats.kappa4.cdf(x, h, k) + vals_comp = stats.logistic.cdf(x) + assert_allclose(vals, vals_comp) + + def test_cdf_uniform(self): + # h = 1 and k = 1 is uniform + x = np.linspace(-5, 5, 10) + h = 1.0 + k = 1.0 + vals = stats.kappa4.cdf(x, h, k) + vals_comp = stats.uniform.cdf(x) + assert_allclose(vals, vals_comp) + + def test_integers_ctor(self): + # regression test for gh-7416: _argcheck fails for integer h and k + # in numpy 1.12 + stats.kappa4(1, 2) + + +class TestPoisson: + def setup_method(self): + np.random.seed(1234) + + def test_pmf_basic(self): + # Basic case + ln2 = np.log(2) + vals = stats.poisson.pmf([0, 1, 2], ln2) + expected = [0.5, ln2/2, ln2**2/4] + assert_allclose(vals, expected) + + def test_mu0(self): + # Edge case: mu=0 + vals = stats.poisson.pmf([0, 1, 2], 0) + expected = [1, 0, 0] + assert_array_equal(vals, expected) + + interval = stats.poisson.interval(0.95, 0) + assert_equal(interval, (0, 0)) + + def test_rvs(self): + vals = stats.poisson.rvs(0.5, size=(2, 50)) + assert_(numpy.all(vals >= 0)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.poisson.rvs(0.5) + assert_(isinstance(val, int)) + val = stats.poisson(0.5).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_stats(self): + mu = 16.0 + result = stats.poisson.stats(mu, moments='mvsk') + assert_allclose(result, [mu, mu, np.sqrt(1.0/mu), 1.0/mu]) + + mu = np.array([0.0, 1.0, 2.0]) + result = stats.poisson.stats(mu, moments='mvsk') + expected = (mu, mu, [np.inf, 1, 1/np.sqrt(2)], [np.inf, 1, 0.5]) + assert_allclose(result, expected) + + +class TestKSTwo: + def setup_method(self): + np.random.seed(1234) + + def test_cdf(self): + for n in [1, 2, 3, 10, 100, 1000]: + # Test x-values: + # 0, 1/2n, where the cdf should be 0 + # 1/n, where the cdf should be n!/n^n + # 0.5, where the cdf should match ksone.cdf + # 1-1/n, where cdf = 1-2/n^n + # 1, where cdf == 1 + # (E.g. Exact values given by Eqn 1 in Simard / L'Ecuyer) + x = np.array([0, 0.5/n, 1/n, 0.5, 1-1.0/n, 1]) + v1 = (1.0/n)**n + lg = scipy.special.gammaln(n+1) + elg = (np.exp(lg) if v1 != 0 else 0) + expected = np.array([0, 0, v1 * elg, + 1 - 2*stats.ksone.sf(0.5, n), + max(1 - 2*v1, 0.0), + 1.0]) + vals_cdf = stats.kstwo.cdf(x, n) + assert_allclose(vals_cdf, expected) + + def test_sf(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + # Same x values as in test_cdf, and use sf = 1 - cdf + x = np.array([0, 0.5/n, 1/n, 0.5, 1-1.0/n, 1]) + v1 = (1.0/n)**n + lg = scipy.special.gammaln(n+1) + elg = (np.exp(lg) if v1 != 0 else 0) + expected = np.array([1.0, 1.0, + 1 - v1 * elg, + 2*stats.ksone.sf(0.5, n), + min(2*v1, 1.0), 0]) + vals_sf = stats.kstwo.sf(x, n) + assert_allclose(vals_sf, expected) + + def test_cdf_sqrtn(self): + # For fixed a, cdf(a/sqrt(n), n) -> kstwobign(a) as n->infinity + # cdf(a/sqrt(n), n) is an increasing function of n (and a) + # Check that the function is indeed increasing (allowing for some + # small floating point and algorithm differences.) + x = np.linspace(0, 2, 11)[1:] + ns = [50, 100, 200, 400, 1000, 2000] + for _x in x: + xn = _x / np.sqrt(ns) + probs = stats.kstwo.cdf(xn, ns) + diffs = np.diff(probs) + assert_array_less(diffs, 1e-8) + + def test_cdf_sf(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + vals_cdf = stats.kstwo.cdf(x, n) + vals_sf = stats.kstwo.sf(x, n) + assert_array_almost_equal(vals_cdf, 1 - vals_sf) + + def test_cdf_sf_sqrtn(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + xn = x / np.sqrt(n) + vals_cdf = stats.kstwo.cdf(xn, n) + vals_sf = stats.kstwo.sf(xn, n) + assert_array_almost_equal(vals_cdf, 1 - vals_sf) + + def test_ppf_of_cdf(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + xn = x[x > 0.5/n] + vals_cdf = stats.kstwo.cdf(xn, n) + # CDFs close to 1 are better dealt with using the SF + cond = (0 < vals_cdf) & (vals_cdf < 0.99) + vals = stats.kstwo.ppf(vals_cdf, n) + assert_allclose(vals[cond], xn[cond], rtol=1e-4) + + def test_isf_of_sf(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + xn = x[x > 0.5/n] + vals_isf = stats.kstwo.isf(xn, n) + cond = (0 < vals_isf) & (vals_isf < 1.0) + vals = stats.kstwo.sf(vals_isf, n) + assert_allclose(vals[cond], xn[cond], rtol=1e-4) + + def test_ppf_of_cdf_sqrtn(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + xn = (x / np.sqrt(n))[x > 0.5/n] + vals_cdf = stats.kstwo.cdf(xn, n) + cond = (0 < vals_cdf) & (vals_cdf < 1.0) + vals = stats.kstwo.ppf(vals_cdf, n) + assert_allclose(vals[cond], xn[cond]) + + def test_isf_of_sf_sqrtn(self): + x = np.linspace(0, 1, 11) + for n in [1, 2, 3, 10, 100, 1000]: + xn = (x / np.sqrt(n))[x > 0.5/n] + vals_sf = stats.kstwo.sf(xn, n) + # SFs close to 1 are better dealt with using the CDF + cond = (0 < vals_sf) & (vals_sf < 0.95) + vals = stats.kstwo.isf(vals_sf, n) + assert_allclose(vals[cond], xn[cond]) + + def test_ppf(self): + probs = np.linspace(0, 1, 11)[1:] + for n in [1, 2, 3, 10, 100, 1000]: + xn = stats.kstwo.ppf(probs, n) + vals_cdf = stats.kstwo.cdf(xn, n) + assert_allclose(vals_cdf, probs) + + def test_simard_lecuyer_table1(self): + # Compute the cdf for values near the mean of the distribution. + # The mean u ~ log(2)*sqrt(pi/(2n)) + # Compute for x in [u/4, u/3, u/2, u, 2u, 3u] + # This is the computation of Table 1 of Simard, R., L'Ecuyer, P. (2011) + # "Computing the Two-Sided Kolmogorov-Smirnov Distribution". + # Except that the values below are not from the published table, but + # were generated using an independent SageMath implementation of + # Durbin's algorithm (with the exponentiation and scaling of + # Marsaglia/Tsang/Wang's version) using 500 bit arithmetic. + # Some of the values in the published table have relative + # errors greater than 1e-4. + ns = [10, 50, 100, 200, 500, 1000] + ratios = np.array([1.0/4, 1.0/3, 1.0/2, 1, 2, 3]) + expected = np.array([ + [1.92155292e-08, 5.72933228e-05, 2.15233226e-02, 6.31566589e-01, + 9.97685592e-01, 9.99999942e-01], + [2.28096224e-09, 1.99142563e-05, 1.42617934e-02, 5.95345542e-01, + 9.96177701e-01, 9.99998662e-01], + [1.00201886e-09, 1.32673079e-05, 1.24608594e-02, 5.86163220e-01, + 9.95866877e-01, 9.99998240e-01], + [4.93313022e-10, 9.52658029e-06, 1.12123138e-02, 5.79486872e-01, + 9.95661824e-01, 9.99997964e-01], + [2.37049293e-10, 6.85002458e-06, 1.01309221e-02, 5.73427224e-01, + 9.95491207e-01, 9.99997750e-01], + [1.56990874e-10, 5.71738276e-06, 9.59725430e-03, 5.70322692e-01, + 9.95409545e-01, 9.99997657e-01] + ]) + for idx, n in enumerate(ns): + x = ratios * np.log(2) * np.sqrt(np.pi/2/n) + vals_cdf = stats.kstwo.cdf(x, n) + assert_allclose(vals_cdf, expected[idx], rtol=1e-5) + + +class TestZipf: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.zipf.rvs(1.5, size=(2, 50)) + assert_(numpy.all(vals >= 1)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.zipf.rvs(1.5) + assert_(isinstance(val, int)) + val = stats.zipf(1.5).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + + def test_moments(self): + # n-th moment is finite iff a > n + 1 + m, v = stats.zipf.stats(a=2.8) + assert_(np.isfinite(m)) + assert_equal(v, np.inf) + + s, k = stats.zipf.stats(a=4.8, moments='sk') + assert_(not np.isfinite([s, k]).all()) + + +class TestDLaplace: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + vals = stats.dlaplace.rvs(1.5, size=(2, 50)) + assert_(numpy.shape(vals) == (2, 50)) + assert_(vals.dtype.char in typecodes['AllInteger']) + val = stats.dlaplace.rvs(1.5) + assert_(isinstance(val, int)) + val = stats.dlaplace(1.5).rvs(3) + assert_(isinstance(val, numpy.ndarray)) + assert_(val.dtype.char in typecodes['AllInteger']) + assert_(stats.dlaplace.rvs(0.8) is not None) + + def test_stats(self): + # compare the explicit formulas w/ direct summation using pmf + a = 1. + dl = stats.dlaplace(a) + m, v, s, k = dl.stats('mvsk') + + N = 37 + xx = np.arange(-N, N+1) + pp = dl.pmf(xx) + m2, m4 = np.sum(pp*xx**2), np.sum(pp*xx**4) + assert_equal((m, s), (0, 0)) + assert_allclose((v, k), (m2, m4/m2**2 - 3.), atol=1e-14, rtol=1e-8) + + def test_stats2(self): + a = np.log(2.) + dl = stats.dlaplace(a) + m, v, s, k = dl.stats('mvsk') + assert_equal((m, s), (0., 0.)) + assert_allclose((v, k), (4., 3.25)) + + +class TestInvgauss: + def setup_method(self): + np.random.seed(1234) + + @pytest.mark.parametrize("rvs_mu,rvs_loc,rvs_scale", + [(2, 0, 1), (4.635, 4.362, 6.303)]) + def test_fit(self, rvs_mu, rvs_loc, rvs_scale): + data = stats.invgauss.rvs(size=100, mu=rvs_mu, + loc=rvs_loc, scale=rvs_scale) + # Analytical MLEs are calculated with formula when `floc` is fixed + mu, loc, scale = stats.invgauss.fit(data, floc=rvs_loc) + + data = data - rvs_loc + mu_temp = np.mean(data) + scale_mle = len(data) / (np.sum(data**(-1) - mu_temp**(-1))) + mu_mle = mu_temp/scale_mle + + # `mu` and `scale` match analytical formula + assert_allclose(mu_mle, mu, atol=1e-15, rtol=1e-15) + assert_allclose(scale_mle, scale, atol=1e-15, rtol=1e-15) + assert_equal(loc, rvs_loc) + data = stats.invgauss.rvs(size=100, mu=rvs_mu, + loc=rvs_loc, scale=rvs_scale) + # fixed parameters are returned + mu, loc, scale = stats.invgauss.fit(data, floc=rvs_loc - 1, + fscale=rvs_scale + 1) + assert_equal(rvs_scale + 1, scale) + assert_equal(rvs_loc - 1, loc) + + # shape can still be fixed with multiple names + shape_mle1 = stats.invgauss.fit(data, fmu=1.04)[0] + shape_mle2 = stats.invgauss.fit(data, fix_mu=1.04)[0] + shape_mle3 = stats.invgauss.fit(data, f0=1.04)[0] + assert shape_mle1 == shape_mle2 == shape_mle3 == 1.04 + + @pytest.mark.parametrize("rvs_mu,rvs_loc,rvs_scale", + [(2, 0, 1), (6.311, 3.225, 4.520)]) + def test_fit_MLE_comp_optimizer(self, rvs_mu, rvs_loc, rvs_scale): + rng = np.random.RandomState(1234) + data = stats.invgauss.rvs(size=100, mu=rvs_mu, + loc=rvs_loc, scale=rvs_scale, random_state=rng) + + super_fit = super(type(stats.invgauss), stats.invgauss).fit + # fitting without `floc` uses superclass fit method + super_fitted = super_fit(data) + invgauss_fit = stats.invgauss.fit(data) + assert_equal(super_fitted, invgauss_fit) + + # fitting with `fmu` is uses superclass fit method + super_fitted = super_fit(data, floc=0, fmu=2) + invgauss_fit = stats.invgauss.fit(data, floc=0, fmu=2) + assert_equal(super_fitted, invgauss_fit) + + # fixed `floc` uses analytical formula and provides better fit than + # super method + _assert_less_or_close_loglike(stats.invgauss, data, floc=rvs_loc) + + # fixed `floc` not resulting in invalid data < 0 uses analytical + # formulas and provides a better fit than the super method + assert np.all((data - (rvs_loc - 1)) > 0) + _assert_less_or_close_loglike(stats.invgauss, data, floc=rvs_loc - 1) + + # fixed `floc` to an arbitrary number, 0, still provides a better fit + # than the super method + _assert_less_or_close_loglike(stats.invgauss, data, floc=0) + + # fixed `fscale` to an arbitrary number still provides a better fit + # than the super method + _assert_less_or_close_loglike(stats.invgauss, data, floc=rvs_loc, + fscale=np.random.rand(1)[0]) + + def test_fit_raise_errors(self): + assert_fit_warnings(stats.invgauss) + # FitDataError is raised when negative invalid data + with pytest.raises(FitDataError): + stats.invgauss.fit([1, 2, 3], floc=2) + + def test_cdf_sf(self): + # Regression tests for gh-13614. + # Ground truth from R's statmod library (pinvgauss), e.g. + # library(statmod) + # options(digits=15) + # mu = c(4.17022005e-04, 7.20324493e-03, 1.14374817e-06, + # 3.02332573e-03, 1.46755891e-03) + # print(pinvgauss(5, mu, 1)) + + # make sure a finite value is returned when mu is very small. see + # GH-13614 + mu = [4.17022005e-04, 7.20324493e-03, 1.14374817e-06, + 3.02332573e-03, 1.46755891e-03] + expected = [1, 1, 1, 1, 1] + actual = stats.invgauss.cdf(0.4, mu=mu) + assert_equal(expected, actual) + + # test if the function can distinguish small left/right tail + # probabilities from zero. + cdf_actual = stats.invgauss.cdf(0.001, mu=1.05) + assert_allclose(cdf_actual, 4.65246506892667e-219) + sf_actual = stats.invgauss.sf(110, mu=1.05) + assert_allclose(sf_actual, 4.12851625944048e-25) + + # test if x does not cause numerical issues when mu is very small + # and x is close to mu in value. + + # slightly smaller than mu + actual = stats.invgauss.cdf(0.00009, 0.0001) + assert_allclose(actual, 2.9458022894924e-26) + + # slightly bigger than mu + actual = stats.invgauss.cdf(0.000102, 0.0001) + assert_allclose(actual, 0.976445540507925) + + def test_logcdf_logsf(self): + # Regression tests for improvements made in gh-13616. + # Ground truth from R's statmod library (pinvgauss), e.g. + # library(statmod) + # options(digits=15) + # print(pinvgauss(0.001, 1.05, 1, log.p=TRUE, lower.tail=FALSE)) + + # test if logcdf and logsf can compute values too small to + # be represented on the unlogged scale. See: gh-13616 + logcdf = stats.invgauss.logcdf(0.0001, mu=1.05) + assert_allclose(logcdf, -5003.87872590367) + logcdf = stats.invgauss.logcdf(110, 1.05) + assert_allclose(logcdf, -4.12851625944087e-25) + logsf = stats.invgauss.logsf(0.001, mu=1.05) + assert_allclose(logsf, -4.65246506892676e-219) + logsf = stats.invgauss.logsf(110, 1.05) + assert_allclose(logsf, -56.1467092416426) + + # from mpmath import mp + # mp.dps = 100 + # mu = mp.mpf(1e-2) + # ref = (1/2 * mp.log(2 * mp.pi * mp.e * mu**3) + # - 3/2* mp.exp(2/mu) * mp.e1(2/mu)) + @pytest.mark.parametrize("mu, ref", [(2e-8, -25.172361826883957), + (1e-3, -8.943444010642972), + (1e-2, -5.4962796152622335), + (1e8, 3.3244822568873476), + (1e100, 3.32448280139689)]) + def test_entropy(self, mu, ref): + assert_allclose(stats.invgauss.entropy(mu), ref, rtol=5e-14) + + +class TestLaplace: + @pytest.mark.parametrize("rvs_loc", [-5, 0, 1, 2]) + @pytest.mark.parametrize("rvs_scale", [1, 2, 3, 10]) + def test_fit(self, rvs_loc, rvs_scale): + # tests that various inputs follow expected behavior + # for a variety of `loc` and `scale`. + rng = np.random.RandomState(1234) + data = stats.laplace.rvs(size=100, loc=rvs_loc, scale=rvs_scale, + random_state=rng) + + # MLE estimates are given by + loc_mle = np.median(data) + scale_mle = np.sum(np.abs(data - loc_mle)) / len(data) + + # standard outputs should match analytical MLE formulas + loc, scale = stats.laplace.fit(data) + assert_allclose(loc, loc_mle, atol=1e-15, rtol=1e-15) + assert_allclose(scale, scale_mle, atol=1e-15, rtol=1e-15) + + # fixed parameter should use analytical formula for other + loc, scale = stats.laplace.fit(data, floc=loc_mle) + assert_allclose(scale, scale_mle, atol=1e-15, rtol=1e-15) + loc, scale = stats.laplace.fit(data, fscale=scale_mle) + assert_allclose(loc, loc_mle) + + # test with non-mle fixed parameter + # create scale with non-median loc + loc = rvs_loc * 2 + scale_mle = np.sum(np.abs(data - loc)) / len(data) + + # fixed loc to non median, scale should match + # scale calculation with modified loc + loc, scale = stats.laplace.fit(data, floc=loc) + assert_equal(scale_mle, scale) + + # fixed scale created with non median loc, + # loc output should still be the data median. + loc, scale = stats.laplace.fit(data, fscale=scale_mle) + assert_equal(loc_mle, loc) + + # error raised when both `floc` and `fscale` are fixed + assert_raises(RuntimeError, stats.laplace.fit, data, floc=loc_mle, + fscale=scale_mle) + + # error is raised with non-finite values + assert_raises(ValueError, stats.laplace.fit, [np.nan]) + assert_raises(ValueError, stats.laplace.fit, [np.inf]) + + @pytest.mark.parametrize("rvs_loc,rvs_scale", [(-5, 10), + (10, 5), + (0.5, 0.2)]) + def test_fit_MLE_comp_optimizer(self, rvs_loc, rvs_scale): + rng = np.random.RandomState(1234) + data = stats.laplace.rvs(size=1000, loc=rvs_loc, scale=rvs_scale, + random_state=rng) + + # the log-likelihood function for laplace is given by + def ll(loc, scale, data): + return -1 * (- (len(data)) * np.log(2*scale) - + (1/scale)*np.sum(np.abs(data - loc))) + + # test that the objective function result of the analytical MLEs is + # less than or equal to that of the numerically optimized estimate + loc, scale = stats.laplace.fit(data) + loc_opt, scale_opt = super(type(stats.laplace), + stats.laplace).fit(data) + ll_mle = ll(loc, scale, data) + ll_opt = ll(loc_opt, scale_opt, data) + assert ll_mle < ll_opt or np.allclose(ll_mle, ll_opt, + atol=1e-15, rtol=1e-15) + + def test_fit_simple_non_random_data(self): + data = np.array([1.0, 1.0, 3.0, 5.0, 8.0, 14.0]) + # with `floc` fixed to 6, scale should be 4. + loc, scale = stats.laplace.fit(data, floc=6) + assert_allclose(scale, 4, atol=1e-15, rtol=1e-15) + # with `fscale` fixed to 6, loc should be 4. + loc, scale = stats.laplace.fit(data, fscale=6) + assert_allclose(loc, 4, atol=1e-15, rtol=1e-15) + + def test_sf_cdf_extremes(self): + # These calculations should not generate warnings. + x = 1000 + p0 = stats.laplace.cdf(-x) + # The exact value is smaller than can be represented with + # 64 bit floating point, so the expected result is 0. + assert p0 == 0.0 + # The closest 64 bit floating point representation of the + # exact value is 1.0. + p1 = stats.laplace.cdf(x) + assert p1 == 1.0 + + p0 = stats.laplace.sf(x) + # The exact value is smaller than can be represented with + # 64 bit floating point, so the expected result is 0. + assert p0 == 0.0 + # The closest 64 bit floating point representation of the + # exact value is 1.0. + p1 = stats.laplace.sf(-x) + assert p1 == 1.0 + + def test_sf(self): + x = 200 + p = stats.laplace.sf(x) + assert_allclose(p, np.exp(-x)/2, rtol=1e-13) + + def test_isf(self): + p = 1e-25 + x = stats.laplace.isf(p) + assert_allclose(x, -np.log(2*p), rtol=1e-13) + + +class TestLogLaplace: + + def test_sf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 100; LogLaplace(c=c).sf(x). + c = np.array([2.0, 3.0, 5.0]) + x = np.array([1e-5, 1e10, 1e15]) + ref = [0.99999999995, 5e-31, 5e-76] + assert_allclose(stats.loglaplace.sf(x, c), ref, rtol=1e-15) + + def test_isf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 100; LogLaplace(c=c).isf(q). + c = 3.25 + q = [0.8, 0.1, 1e-10, 1e-20, 1e-40] + ref = [0.7543222539245642, 1.6408455124660906, 964.4916294395846, + 1151387.578354072, 1640845512466.0906] + assert_allclose(stats.loglaplace.isf(q, c), ref, rtol=1e-14) + + @pytest.mark.parametrize('r', [1, 2, 3, 4]) + def test_moments_stats(self, r): + mom = 'mvsk'[r - 1] + c = np.arange(0.5, r + 0.5, 0.5) + + # r-th non-central moment is infinite if |r| >= c. + assert_allclose(stats.loglaplace.moment(r, c), np.inf) + + # r-th non-central moment is non-finite (inf or nan) if r >= c. + assert not np.any(np.isfinite(stats.loglaplace.stats(c, moments=mom))) + + @pytest.mark.parametrize("c", [0.5, 1.0, 2.0]) + @pytest.mark.parametrize("loc, scale", [(-1.2, 3.45)]) + @pytest.mark.parametrize("fix_c", [True, False]) + @pytest.mark.parametrize("fix_scale", [True, False]) + def test_fit_analytic_mle(self, c, loc, scale, fix_c, fix_scale): + # Test that the analytical MLE produces no worse result than the + # generic (numerical) MLE. + + rng = np.random.default_rng(6762668991392531563) + data = stats.loglaplace.rvs(c, loc=loc, scale=scale, size=100, + random_state=rng) + + kwds = {'floc': loc} + if fix_c: + kwds['fc'] = c + if fix_scale: + kwds['fscale'] = scale + nfree = 3 - len(kwds) + + if nfree == 0: + error_msg = "All parameters fixed. There is nothing to optimize." + with pytest.raises((RuntimeError, ValueError), match=error_msg): + stats.loglaplace.fit(data, **kwds) + return + + _assert_less_or_close_loglike(stats.loglaplace, data, **kwds) + +class TestPowerlaw: + + # In the following data, `sf` was computed with mpmath. + @pytest.mark.parametrize('x, a, sf', + [(0.25, 2.0, 0.9375), + (0.99609375, 1/256, 1.528855235208108e-05)]) + def test_sf(self, x, a, sf): + assert_allclose(stats.powerlaw.sf(x, a), sf, rtol=1e-15) + + @pytest.fixture(scope='function') + def rng(self): + return np.random.default_rng(1234) + + @pytest.mark.parametrize("rvs_shape", [.1, .5, .75, 1, 2]) + @pytest.mark.parametrize("rvs_loc", [-1, 0, 1]) + @pytest.mark.parametrize("rvs_scale", [.1, 1, 5]) + @pytest.mark.parametrize('fix_shape, fix_loc, fix_scale', + [p for p in product([True, False], repeat=3) + if False in p]) + def test_fit_MLE_comp_optimizer(self, rvs_shape, rvs_loc, rvs_scale, + fix_shape, fix_loc, fix_scale, rng): + data = stats.powerlaw.rvs(size=250, a=rvs_shape, loc=rvs_loc, + scale=rvs_scale, random_state=rng) + + kwds = dict() + if fix_shape: + kwds['f0'] = rvs_shape + if fix_loc: + kwds['floc'] = np.nextafter(data.min(), -np.inf) + if fix_scale: + kwds['fscale'] = rvs_scale + + # Numerical result may equal analytical result if some code path + # of the analytical routine makes use of numerical optimization. + _assert_less_or_close_loglike(stats.powerlaw, data, **kwds, + maybe_identical=True) + + def test_problem_case(self): + # An observed problem with the test method indicated that some fixed + # scale values could cause bad results, this is now corrected. + a = 2.50002862645130604506 + location = 0.0 + scale = 35.249023299873095 + + data = stats.powerlaw.rvs(a=a, loc=location, scale=scale, size=100, + random_state=np.random.default_rng(5)) + + kwds = {'fscale': np.ptp(data) * 2} + + _assert_less_or_close_loglike(stats.powerlaw, data, **kwds) + + def test_fit_warnings(self): + assert_fit_warnings(stats.powerlaw) + # test for error when `fscale + floc <= np.max(data)` is not satisfied + msg = r" Maximum likelihood estimation with 'powerlaw' requires" + with assert_raises(FitDataError, match=msg): + stats.powerlaw.fit([1, 2, 4], floc=0, fscale=3) + + # test for error when `data - floc >= 0` is not satisfied + msg = r" Maximum likelihood estimation with 'powerlaw' requires" + with assert_raises(FitDataError, match=msg): + stats.powerlaw.fit([1, 2, 4], floc=2) + + # test for fixed location not less than `min(data)`. + msg = r" Maximum likelihood estimation with 'powerlaw' requires" + with assert_raises(FitDataError, match=msg): + stats.powerlaw.fit([1, 2, 4], floc=1) + + # test for when fixed scale is less than or equal to range of data + msg = r"Negative or zero `fscale` is outside" + with assert_raises(ValueError, match=msg): + stats.powerlaw.fit([1, 2, 4], fscale=-3) + + # test for when fixed scale is less than or equal to range of data + msg = r"`fscale` must be greater than the range of data." + with assert_raises(ValueError, match=msg): + stats.powerlaw.fit([1, 2, 4], fscale=3) + + def test_minimum_data_zero_gh17801(self): + # gh-17801 reported an overflow error when the minimum value of the + # data is zero. Check that this problem is resolved. + data = [0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6] + dist = stats.powerlaw + with np.errstate(over='ignore'): + _assert_less_or_close_loglike(dist, data) + + +class TestPowerLogNorm: + + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 80 + # def powerlognorm_sf_mp(x, c, s): + # x = mp.mpf(x) + # c = mp.mpf(c) + # s = mp.mpf(s) + # return mp.ncdf(-mp.log(x) / s)**c + # + # def powerlognormal_cdf_mp(x, c, s): + # return mp.one - powerlognorm_sf_mp(x, c, s) + # + # x, c, s = 100, 20, 1 + # print(float(powerlognorm_sf_mp(x, c, s))) + + @pytest.mark.parametrize("x, c, s, ref", + [(100, 20, 1, 1.9057100820561928e-114), + (1e-3, 20, 1, 0.9999999999507617), + (1e-3, 0.02, 1, 0.9999999999999508), + (1e22, 0.02, 1, 6.50744044621611e-12)]) + def test_sf(self, x, c, s, ref): + assert_allclose(stats.powerlognorm.sf(x, c, s), ref, rtol=1e-13) + + # reference values were computed via mpmath using the survival + # function above (passing in `ref` and getting `q`). + @pytest.mark.parametrize("q, c, s, ref", + [(0.9999999587870905, 0.02, 1, 0.01), + (6.690376686108851e-233, 20, 1, 1000)]) + def test_isf(self, q, c, s, ref): + assert_allclose(stats.powerlognorm.isf(q, c, s), ref, rtol=5e-11) + + @pytest.mark.parametrize("x, c, s, ref", + [(1e25, 0.02, 1, 0.9999999999999963), + (1e-6, 0.02, 1, 2.054921078040843e-45), + (1e-6, 200, 1, 2.0549210780408428e-41), + (0.3, 200, 1, 0.9999999999713368)]) + def test_cdf(self, x, c, s, ref): + assert_allclose(stats.powerlognorm.cdf(x, c, s), ref, rtol=3e-14) + + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 50 + # def powerlognorm_pdf_mpmath(x, c, s): + # x = mp.mpf(x) + # c = mp.mpf(c) + # s = mp.mpf(s) + # res = (c/(x * s) * mp.npdf(mp.log(x)/s) * + # mp.ncdf(-mp.log(x)/s)**(c - mp.one)) + # return float(res) + + @pytest.mark.parametrize("x, c, s, ref", + [(1e22, 0.02, 1, 6.5954987852335016e-34), + (1e20, 1e-3, 1, 1.588073750563988e-22), + (1e40, 1e-3, 1, 1.3179391812506349e-43)]) + def test_pdf(self, x, c, s, ref): + assert_allclose(stats.powerlognorm.pdf(x, c, s), ref, rtol=3e-12) + + +class TestPowerNorm: + + # survival function references were computed with mpmath via + # from mpmath import mp + # x = mp.mpf(x) + # c = mp.mpf(x) + # float(mp.ncdf(-x)**c) + + @pytest.mark.parametrize("x, c, ref", + [(9, 1, 1.1285884059538405e-19), + (20, 2, 7.582445786569958e-178), + (100, 0.02, 3.330957891903866e-44), + (200, 0.01, 1.3004759092324774e-87)]) + def test_sf(self, x, c, ref): + assert_allclose(stats.powernorm.sf(x, c), ref, rtol=1e-13) + + # inverse survival function references were computed with mpmath via + # from mpmath import mp + # def isf_mp(q, c): + # q = mp.mpf(q) + # c = mp.mpf(c) + # arg = q**(mp.one / c) + # return float(-mp.sqrt(2) * mp.erfinv(mp.mpf(2.) * arg - mp.one)) + + @pytest.mark.parametrize("q, c, ref", + [(1e-5, 20, -0.15690800666514138), + (0.99999, 100, -5.19933666203545), + (0.9999, 0.02, -2.576676052143387), + (5e-2, 0.02, 17.089518110222244), + (1e-18, 2, 5.9978070150076865), + (1e-50, 5, 6.361340902404057)]) + def test_isf(self, q, c, ref): + assert_allclose(stats.powernorm.isf(q, c), ref, rtol=5e-12) + + # CDF reference values were computed with mpmath via + # from mpmath import mp + # def cdf_mp(x, c): + # x = mp.mpf(x) + # c = mp.mpf(c) + # return float(mp.one - mp.ncdf(-x)**c) + + @pytest.mark.parametrize("x, c, ref", + [(-12, 9, 1.598833900869911e-32), + (2, 9, 0.9999999999999983), + (-20, 9, 2.4782617067456103e-88), + (-5, 0.02, 5.733032242841443e-09), + (-20, 0.02, 5.507248237212467e-91)]) + def test_cdf(self, x, c, ref): + assert_allclose(stats.powernorm.cdf(x, c), ref, rtol=5e-14) + + +class TestInvGamma: + def test_invgamma_inf_gh_1866(self): + # invgamma's moments are only finite for a>n + # specific numbers checked w/ boost 1.54 + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + mvsk = stats.invgamma.stats(a=19.31, moments='mvsk') + expected = [0.05461496450, 0.0001723162534, 1.020362676, + 2.055616582] + assert_allclose(mvsk, expected) + + a = [1.1, 3.1, 5.6] + mvsk = stats.invgamma.stats(a=a, moments='mvsk') + expected = ([10., 0.476190476, 0.2173913043], # mmm + [np.inf, 0.2061430632, 0.01312749422], # vvv + [np.nan, 41.95235392, 2.919025532], # sss + [np.nan, np.nan, 24.51923076]) # kkk + for x, y in zip(mvsk, expected): + assert_almost_equal(x, y) + + def test_cdf_ppf(self): + # gh-6245 + x = np.logspace(-2.6, 0) + y = stats.invgamma.cdf(x, 1) + xx = stats.invgamma.ppf(y, 1) + assert_allclose(x, xx) + + def test_sf_isf(self): + # gh-6245 + if sys.maxsize > 2**32: + x = np.logspace(2, 100) + else: + # Invgamme roundtrip on 32-bit systems has relative accuracy + # ~1e-15 until x=1e+15, and becomes inf above x=1e+18 + x = np.logspace(2, 18) + + y = stats.invgamma.sf(x, 1) + xx = stats.invgamma.isf(y, 1) + assert_allclose(x, xx, rtol=1.0) + + @pytest.mark.parametrize("a, ref", + [(100000000.0, -26.21208257605721), + (1e+100, -343.9688254159022)]) + def test_large_entropy(self, a, ref): + # The reference values were calculated with mpmath: + # from mpmath import mp + # mp.dps = 500 + + # def invgamma_entropy(a): + # a = mp.mpf(a) + # h = a + mp.loggamma(a) - (mp.one + a) * mp.digamma(a) + # return float(h) + assert_allclose(stats.invgamma.entropy(a), ref, rtol=1e-15) + + +class TestF: + def test_endpoints(self): + # Compute the pdf at the left endpoint dst.a. + data = [[stats.f, (2, 1), 1.0]] + for _f, _args, _correct in data: + ans = _f.pdf(_f.a, *_args) + + ans = [_f.pdf(_f.a, *_args) for _f, _args, _ in data] + correct = [_correct_ for _f, _args, _correct_ in data] + assert_array_almost_equal(ans, correct) + + def test_f_moments(self): + # n-th moment of F distributions is only finite for n < dfd / 2 + m, v, s, k = stats.f.stats(11, 6.5, moments='mvsk') + assert_(np.isfinite(m)) + assert_(np.isfinite(v)) + assert_(np.isfinite(s)) + assert_(not np.isfinite(k)) + + def test_moments_warnings(self): + # no warnings should be generated for dfd = 2, 4, 6, 8 (div by zero) + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + stats.f.stats(dfn=[11]*4, dfd=[2, 4, 6, 8], moments='mvsk') + + def test_stats_broadcast(self): + dfn = np.array([[3], [11]]) + dfd = np.array([11, 12]) + m, v, s, k = stats.f.stats(dfn=dfn, dfd=dfd, moments='mvsk') + m2 = [dfd / (dfd - 2)]*2 + assert_allclose(m, m2) + v2 = 2 * dfd**2 * (dfn + dfd - 2) / dfn / (dfd - 2)**2 / (dfd - 4) + assert_allclose(v, v2) + s2 = ((2*dfn + dfd - 2) * np.sqrt(8*(dfd - 4)) / + ((dfd - 6) * np.sqrt(dfn*(dfn + dfd - 2)))) + assert_allclose(s, s2) + k2num = 12 * (dfn * (5*dfd - 22) * (dfn + dfd - 2) + + (dfd - 4) * (dfd - 2)**2) + k2den = dfn * (dfd - 6) * (dfd - 8) * (dfn + dfd - 2) + k2 = k2num / k2den + assert_allclose(k, k2) + + +class TestStudentT: + def test_rvgeneric_std(self): + # Regression test for #1191 + assert_array_almost_equal(stats.t.std([5, 6]), [1.29099445, 1.22474487]) + + def test_moments_t(self): + # regression test for #8786 + assert_equal(stats.t.stats(df=1, moments='mvsk'), + (np.inf, np.nan, np.nan, np.nan)) + assert_equal(stats.t.stats(df=1.01, moments='mvsk'), + (0.0, np.inf, np.nan, np.nan)) + assert_equal(stats.t.stats(df=2, moments='mvsk'), + (0.0, np.inf, np.nan, np.nan)) + assert_equal(stats.t.stats(df=2.01, moments='mvsk'), + (0.0, 2.01/(2.01-2.0), np.nan, np.inf)) + assert_equal(stats.t.stats(df=3, moments='sk'), (np.nan, np.inf)) + assert_equal(stats.t.stats(df=3.01, moments='sk'), (0.0, np.inf)) + assert_equal(stats.t.stats(df=4, moments='sk'), (0.0, np.inf)) + assert_equal(stats.t.stats(df=4.01, moments='sk'), (0.0, 6.0/(4.01 - 4.0))) + + def test_t_entropy(self): + df = [1, 2, 25, 100] + # Expected values were computed with mpmath. + expected = [2.5310242469692907, 1.9602792291600821, + 1.459327578078393, 1.4289633653182439] + assert_allclose(stats.t.entropy(df), expected, rtol=1e-13) + + @pytest.mark.parametrize("v, ref", + [(100, 1.4289633653182439), + (1e+100, 1.4189385332046727)]) + def test_t_extreme_entropy(self, v, ref): + # Reference values were calculated with mpmath: + # from mpmath import mp + # mp.dps = 500 + # + # def t_entropy(v): + # v = mp.mpf(v) + # C = (v + mp.one) / 2 + # A = C * (mp.digamma(C) - mp.digamma(v / 2)) + # B = 0.5 * mp.log(v) + mp.log(mp.beta(v / 2, mp.one / 2)) + # h = A + B + # return float(h) + assert_allclose(stats.t.entropy(v), ref, rtol=1e-14) + + @pytest.mark.parametrize("methname", ["pdf", "logpdf", "cdf", + "ppf", "sf", "isf"]) + @pytest.mark.parametrize("df_infmask", [[0, 0], [1, 1], [0, 1], + [[0, 1, 0], [1, 1, 1]], + [[1, 0], [0, 1]], + [[0], [1]]]) + def test_t_inf_df(self, methname, df_infmask): + np.random.seed(0) + df_infmask = np.asarray(df_infmask, dtype=bool) + df = np.random.uniform(0, 10, size=df_infmask.shape) + x = np.random.randn(*df_infmask.shape) + df[df_infmask] = np.inf + t_dist = stats.t(df=df, loc=3, scale=1) + t_dist_ref = stats.t(df=df[~df_infmask], loc=3, scale=1) + norm_dist = stats.norm(loc=3, scale=1) + t_meth = getattr(t_dist, methname) + t_meth_ref = getattr(t_dist_ref, methname) + norm_meth = getattr(norm_dist, methname) + res = t_meth(x) + assert_equal(res[df_infmask], norm_meth(x[df_infmask])) + assert_equal(res[~df_infmask], t_meth_ref(x[~df_infmask])) + + @pytest.mark.parametrize("df_infmask", [[0, 0], [1, 1], [0, 1], + [[0, 1, 0], [1, 1, 1]], + [[1, 0], [0, 1]], + [[0], [1]]]) + def test_t_inf_df_stats_entropy(self, df_infmask): + np.random.seed(0) + df_infmask = np.asarray(df_infmask, dtype=bool) + df = np.random.uniform(0, 10, size=df_infmask.shape) + df[df_infmask] = np.inf + res = stats.t.stats(df=df, loc=3, scale=1, moments='mvsk') + res_ex_inf = stats.norm.stats(loc=3, scale=1, moments='mvsk') + res_ex_noinf = stats.t.stats(df=df[~df_infmask], loc=3, scale=1, + moments='mvsk') + for i in range(4): + assert_equal(res[i][df_infmask], res_ex_inf[i]) + assert_equal(res[i][~df_infmask], res_ex_noinf[i]) + + res = stats.t.entropy(df=df, loc=3, scale=1) + res_ex_inf = stats.norm.entropy(loc=3, scale=1) + res_ex_noinf = stats.t.entropy(df=df[~df_infmask], loc=3, scale=1) + assert_equal(res[df_infmask], res_ex_inf) + assert_equal(res[~df_infmask], res_ex_noinf) + + def test_logpdf_pdf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 500; StudentT(df=df).logpdf(x), StudentT(df=df).pdf(x) + x = [1, 1e3, 10, 1] + df = [1e100, 1e50, 1e20, 1] + logpdf_ref = [-1.4189385332046727, -500000.9189385332, + -50.918938533204674, -1.8378770664093456] + pdf_ref = [0.24197072451914334, 0, + 7.69459862670642e-23, 0.15915494309189535] + assert_allclose(stats.t.logpdf(x, df), logpdf_ref, rtol=1e-14) + assert_allclose(stats.t.pdf(x, df), pdf_ref, rtol=1e-14) + + +class TestRvDiscrete: + def setup_method(self): + np.random.seed(1234) + + def test_rvs(self): + states = [-1, 0, 1, 2, 3, 4] + probability = [0.0, 0.3, 0.4, 0.0, 0.3, 0.0] + samples = 1000 + r = stats.rv_discrete(name='sample', values=(states, probability)) + x = r.rvs(size=samples) + assert_(isinstance(x, numpy.ndarray)) + + for s, p in zip(states, probability): + assert_(abs(sum(x == s)/float(samples) - p) < 0.05) + + x = r.rvs() + assert np.issubdtype(type(x), np.integer) + + def test_entropy(self): + # Basic tests of entropy. + pvals = np.array([0.25, 0.45, 0.3]) + p = stats.rv_discrete(values=([0, 1, 2], pvals)) + expected_h = -sum(xlogy(pvals, pvals)) + h = p.entropy() + assert_allclose(h, expected_h) + + p = stats.rv_discrete(values=([0, 1, 2], [1.0, 0, 0])) + h = p.entropy() + assert_equal(h, 0.0) + + def test_pmf(self): + xk = [1, 2, 4] + pk = [0.5, 0.3, 0.2] + rv = stats.rv_discrete(values=(xk, pk)) + + x = [[1., 4.], + [3., 2]] + assert_allclose(rv.pmf(x), + [[0.5, 0.2], + [0., 0.3]], atol=1e-14) + + def test_cdf(self): + xk = [1, 2, 4] + pk = [0.5, 0.3, 0.2] + rv = stats.rv_discrete(values=(xk, pk)) + + x_values = [-2, 1., 1.1, 1.5, 2.0, 3.0, 4, 5] + expected = [0, 0.5, 0.5, 0.5, 0.8, 0.8, 1, 1] + assert_allclose(rv.cdf(x_values), expected, atol=1e-14) + + # also check scalar arguments + assert_allclose([rv.cdf(xx) for xx in x_values], + expected, atol=1e-14) + + def test_ppf(self): + xk = [1, 2, 4] + pk = [0.5, 0.3, 0.2] + rv = stats.rv_discrete(values=(xk, pk)) + + q_values = [0.1, 0.5, 0.6, 0.8, 0.9, 1.] + expected = [1, 1, 2, 2, 4, 4] + assert_allclose(rv.ppf(q_values), expected, atol=1e-14) + + # also check scalar arguments + assert_allclose([rv.ppf(q) for q in q_values], + expected, atol=1e-14) + + def test_cdf_ppf_next(self): + # copied and special cased from test_discrete_basic + vals = ([1, 2, 4, 7, 8], [0.1, 0.2, 0.3, 0.3, 0.1]) + rv = stats.rv_discrete(values=vals) + + assert_array_equal(rv.ppf(rv.cdf(rv.xk[:-1]) + 1e-8), + rv.xk[1:]) + + def test_multidimension(self): + xk = np.arange(12).reshape((3, 4)) + pk = np.array([[0.1, 0.1, 0.15, 0.05], + [0.1, 0.1, 0.05, 0.05], + [0.1, 0.1, 0.05, 0.05]]) + rv = stats.rv_discrete(values=(xk, pk)) + + assert_allclose(rv.expect(), np.sum(rv.xk * rv.pk), atol=1e-14) + + def test_bad_input(self): + xk = [1, 2, 3] + pk = [0.5, 0.5] + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + pk = [1, 2, 3] + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + xk = [1, 2, 3] + pk = [0.5, 1.2, -0.7] + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + xk = [1, 2, 3, 4, 5] + pk = [0.3, 0.3, 0.3, 0.3, -0.2] + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + xk = [1, 1] + pk = [0.5, 0.5] + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + def test_shape_rv_sample(self): + # tests added for gh-9565 + + # mismatch of 2d inputs + xk, pk = np.arange(4).reshape((2, 2)), np.full((2, 3), 1/6) + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + # same number of elements, but shapes not compatible + xk, pk = np.arange(6).reshape((3, 2)), np.full((2, 3), 1/6) + assert_raises(ValueError, stats.rv_discrete, **dict(values=(xk, pk))) + + # same shapes => no error + xk, pk = np.arange(6).reshape((3, 2)), np.full((3, 2), 1/6) + assert_equal(stats.rv_discrete(values=(xk, pk)).pmf(0), 1/6) + + def test_expect1(self): + xk = [1, 2, 4, 6, 7, 11] + pk = [0.1, 0.2, 0.2, 0.2, 0.2, 0.1] + rv = stats.rv_discrete(values=(xk, pk)) + + assert_allclose(rv.expect(), np.sum(rv.xk * rv.pk), atol=1e-14) + + def test_expect2(self): + # rv_sample should override _expect. Bug report from + # https://stackoverflow.com/questions/63199792 + y = [200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, + 1100.0, 1200.0, 1300.0, 1400.0, 1500.0, 1600.0, 1700.0, 1800.0, + 1900.0, 2000.0, 2100.0, 2200.0, 2300.0, 2400.0, 2500.0, 2600.0, + 2700.0, 2800.0, 2900.0, 3000.0, 3100.0, 3200.0, 3300.0, 3400.0, + 3500.0, 3600.0, 3700.0, 3800.0, 3900.0, 4000.0, 4100.0, 4200.0, + 4300.0, 4400.0, 4500.0, 4600.0, 4700.0, 4800.0] + + py = [0.0004, 0.0, 0.0033, 0.006500000000000001, 0.0, 0.0, + 0.004399999999999999, 0.6862, 0.0, 0.0, 0.0, + 0.00019999999999997797, 0.0006000000000000449, + 0.024499999999999966, 0.006400000000000072, + 0.0043999999999999595, 0.019499999999999962, + 0.03770000000000007, 0.01759999999999995, 0.015199999999999991, + 0.018100000000000005, 0.04500000000000004, 0.0025999999999999357, + 0.0, 0.0041000000000001036, 0.005999999999999894, + 0.0042000000000000925, 0.0050000000000000044, + 0.0041999999999999815, 0.0004999999999999449, + 0.009199999999999986, 0.008200000000000096, + 0.0, 0.0, 0.0046999999999999265, 0.0019000000000000128, + 0.0006000000000000449, 0.02510000000000001, 0.0, + 0.007199999999999984, 0.0, 0.012699999999999934, 0.0, 0.0, + 0.008199999999999985, 0.005600000000000049, 0.0] + + rv = stats.rv_discrete(values=(y, py)) + + # check the mean + assert_allclose(rv.expect(), rv.mean(), atol=1e-14) + assert_allclose(rv.expect(), + sum(v * w for v, w in zip(y, py)), atol=1e-14) + + # also check the second moment + assert_allclose(rv.expect(lambda x: x**2), + sum(v**2 * w for v, w in zip(y, py)), atol=1e-14) + + +class TestSkewCauchy: + def test_cauchy(self): + x = np.linspace(-5, 5, 100) + assert_array_almost_equal(stats.skewcauchy.pdf(x, a=0), + stats.cauchy.pdf(x)) + assert_array_almost_equal(stats.skewcauchy.cdf(x, a=0), + stats.cauchy.cdf(x)) + assert_array_almost_equal(stats.skewcauchy.ppf(x, a=0), + stats.cauchy.ppf(x)) + + def test_skewcauchy_R(self): + # options(digits=16) + # library(sgt) + # # lmbda, x contain the values generated for a, x below + # lmbda <- c(0.0976270078546495, 0.430378732744839, 0.2055267521432877, + # 0.0897663659937937, -0.15269040132219, 0.2917882261333122, + # -0.12482557747462, 0.7835460015641595, 0.9273255210020589, + # -0.2331169623484446) + # x <- c(2.917250380826646, 0.2889491975290444, 0.6804456109393229, + # 4.25596638292661, -4.289639418021131, -4.1287070029845925, + # -4.797816025596743, 3.32619845547938, 2.7815675094985046, + # 3.700121482468191) + # pdf = dsgt(x, mu=0, lambda=lambda, sigma=1, q=1/2, mean.cent=FALSE, + # var.adj = sqrt(2)) + # cdf = psgt(x, mu=0, lambda=lambda, sigma=1, q=1/2, mean.cent=FALSE, + # var.adj = sqrt(2)) + # qsgt(cdf, mu=0, lambda=lambda, sigma=1, q=1/2, mean.cent=FALSE, + # var.adj = sqrt(2)) + + np.random.seed(0) + a = np.random.rand(10) * 2 - 1 + x = np.random.rand(10) * 10 - 5 + pdf = [0.039473975217333909, 0.305829714049903223, 0.24140158118994162, + 0.019585772402693054, 0.021436553695989482, 0.00909817103867518, + 0.01658423410016873, 0.071083288030394126, 0.103250045941454524, + 0.013110230778426242] + cdf = [0.87426677718213752, 0.37556468910780882, 0.59442096496538066, + 0.91304659850890202, 0.09631964100300605, 0.03829624330921733, + 0.08245240578402535, 0.72057062945510386, 0.62826415852515449, + 0.95011308463898292] + assert_allclose(stats.skewcauchy.pdf(x, a), pdf) + assert_allclose(stats.skewcauchy.cdf(x, a), cdf) + assert_allclose(stats.skewcauchy.ppf(cdf, a), x) + + +class TestJFSkewT: + def test_compare_t(self): + # Verify that jf_skew_t with a=b recovers the t distribution with 2a + # degrees of freedom + a = b = 5 + df = a * 2 + x = [-1.0, 0.0, 1.0, 2.0] + q = [0.0, 0.1, 0.25, 0.75, 0.90, 1.0] + + jf = stats.jf_skew_t(a, b) + t = stats.t(df) + + assert_allclose(jf.pdf(x), t.pdf(x)) + assert_allclose(jf.cdf(x), t.cdf(x)) + assert_allclose(jf.ppf(q), t.ppf(q)) + assert_allclose(jf.stats('mvsk'), t.stats('mvsk')) + + @pytest.fixture + def gamlss_pdf_data(self): + """Sample data points computed using the `ST5` distribution from the + GAMLSS package in R. The pdf has been calculated for (a,b)=(2,3), + (a,b)=(8,4), and (a,b)=(12,13) for x in `np.linspace(-10, 10, 41)`. + + N.B. the `ST5` distribution in R uses an alternative parameterization + in terms of nu and tau, where: + - nu = (a - b) / (a * b * (a + b)) ** 0.5 + - tau = 2 / (a + b) + """ + data = np.load( + Path(__file__).parent / "data/jf_skew_t_gamlss_pdf_data.npy" + ) + return np.rec.fromarrays(data, names="x,pdf,a,b") + + @pytest.mark.parametrize("a,b", [(2, 3), (8, 4), (12, 13)]) + def test_compare_with_gamlss_r(self, gamlss_pdf_data, a, b): + """Compare the pdf with a table of reference values. The table of + reference values was produced using R, where the Jones and Faddy skew + t distribution is available in the GAMLSS package as `ST5`. + """ + data = gamlss_pdf_data[ + (gamlss_pdf_data["a"] == a) & (gamlss_pdf_data["b"] == b) + ] + x, pdf = data["x"], data["pdf"] + assert_allclose(pdf, stats.jf_skew_t(a, b).pdf(x), rtol=1e-12) + +# Test data for TestSkewNorm.test_noncentral_moments() +# The expected noncentral moments were computed by Wolfram Alpha. +# In Wolfram Alpha, enter +# SkewNormalDistribution[0, 1, a] moment +# with `a` replaced by the desired shape parameter. In the results, there +# should be a table of the first four moments. Click on "More" to get more +# moments. The expected moments start with the first moment (order = 1). +_skewnorm_noncentral_moments = [ + (2, [2*np.sqrt(2/(5*np.pi)), + 1, + 22/5*np.sqrt(2/(5*np.pi)), + 3, + 446/25*np.sqrt(2/(5*np.pi)), + 15, + 2682/25*np.sqrt(2/(5*np.pi)), + 105, + 107322/125*np.sqrt(2/(5*np.pi))]), + (0.1, [np.sqrt(2/(101*np.pi)), + 1, + 302/101*np.sqrt(2/(101*np.pi)), + 3, + (152008*np.sqrt(2/(101*np.pi)))/10201, + 15, + (107116848*np.sqrt(2/(101*np.pi)))/1030301, + 105, + (97050413184*np.sqrt(2/(101*np.pi)))/104060401]), + (-3, [-3/np.sqrt(5*np.pi), + 1, + -63/(10*np.sqrt(5*np.pi)), + 3, + -2529/(100*np.sqrt(5*np.pi)), + 15, + -30357/(200*np.sqrt(5*np.pi)), + 105, + -2428623/(2000*np.sqrt(5*np.pi)), + 945, + -242862867/(20000*np.sqrt(5*np.pi)), + 10395, + -29143550277/(200000*np.sqrt(5*np.pi)), + 135135]), +] + + +class TestSkewNorm: + def setup_method(self): + self.rng = check_random_state(1234) + + def test_normal(self): + # When the skewness is 0 the distribution is normal + x = np.linspace(-5, 5, 100) + assert_array_almost_equal(stats.skewnorm.pdf(x, a=0), + stats.norm.pdf(x)) + + def test_rvs(self): + shape = (3, 4, 5) + x = stats.skewnorm.rvs(a=0.75, size=shape, random_state=self.rng) + assert_equal(shape, x.shape) + + x = stats.skewnorm.rvs(a=-3, size=shape, random_state=self.rng) + assert_equal(shape, x.shape) + + def test_moments(self): + X = stats.skewnorm.rvs(a=4, size=int(1e6), loc=5, scale=2, + random_state=self.rng) + expected = [np.mean(X), np.var(X), stats.skew(X), stats.kurtosis(X)] + computed = stats.skewnorm.stats(a=4, loc=5, scale=2, moments='mvsk') + assert_array_almost_equal(computed, expected, decimal=2) + + X = stats.skewnorm.rvs(a=-4, size=int(1e6), loc=5, scale=2, + random_state=self.rng) + expected = [np.mean(X), np.var(X), stats.skew(X), stats.kurtosis(X)] + computed = stats.skewnorm.stats(a=-4, loc=5, scale=2, moments='mvsk') + assert_array_almost_equal(computed, expected, decimal=2) + + def test_pdf_large_x(self): + # Triples are [x, a, logpdf(x, a)]. These values were computed + # using Log[PDF[SkewNormalDistribution[0, 1, a], x]] in Wolfram Alpha. + logpdfvals = [ + [40, -1, -1604.834233366398515598970], + [40, -1/2, -1004.142946723741991369168], + [40, 0, -800.9189385332046727417803], + [40, 1/2, -800.2257913526447274323631], + [-40, -1/2, -800.2257913526447274323631], + [-2, 1e7, -2.000000000000199559727173e14], + [2, -1e7, -2.000000000000199559727173e14], + ] + for x, a, logpdfval in logpdfvals: + logp = stats.skewnorm.logpdf(x, a) + assert_allclose(logp, logpdfval, rtol=1e-8) + + def test_cdf_large_x(self): + # Regression test for gh-7746. + # The x values are large enough that the closest 64 bit floating + # point representation of the exact CDF is 1.0. + p = stats.skewnorm.cdf([10, 20, 30], -1) + assert_allclose(p, np.ones(3), rtol=1e-14) + p = stats.skewnorm.cdf(25, 2.5) + assert_allclose(p, 1.0, rtol=1e-14) + + def test_cdf_sf_small_values(self): + # Triples are [x, a, cdf(x, a)]. These values were computed + # using CDF[SkewNormalDistribution[0, 1, a], x] in Wolfram Alpha. + cdfvals = [ + [-8, 1, 3.870035046664392611e-31], + [-4, 2, 8.1298399188811398e-21], + [-2, 5, 1.55326826787106273e-26], + [-9, -1, 2.257176811907681295e-19], + [-10, -4, 1.523970604832105213e-23], + ] + for x, a, cdfval in cdfvals: + p = stats.skewnorm.cdf(x, a) + assert_allclose(p, cdfval, rtol=1e-8) + # For the skew normal distribution, sf(-x, -a) = cdf(x, a). + p = stats.skewnorm.sf(-x, -a) + assert_allclose(p, cdfval, rtol=1e-8) + + @pytest.mark.parametrize('a, moments', _skewnorm_noncentral_moments) + def test_noncentral_moments(self, a, moments): + for order, expected in enumerate(moments, start=1): + mom = stats.skewnorm.moment(order, a) + assert_allclose(mom, expected, rtol=1e-14) + + def test_fit(self): + rng = np.random.default_rng(4609813989115202851) + + a, loc, scale = -2, 3.5, 0.5 # arbitrary, valid parameters + dist = stats.skewnorm(a, loc, scale) + rvs = dist.rvs(size=100, random_state=rng) + + # test that MLE still honors guesses and fixed parameters + a2, loc2, scale2 = stats.skewnorm.fit(rvs, -1.5, floc=3) + a3, loc3, scale3 = stats.skewnorm.fit(rvs, -1.6, floc=3) + assert loc2 == loc3 == 3 # fixed parameter is respected + assert a2 != a3 # different guess -> (slightly) different outcome + # quality of fit is tested elsewhere + + # test that MoM honors fixed parameters, accepts (but ignores) guesses + a4, loc4, scale4 = stats.skewnorm.fit(rvs, 3, fscale=3, method='mm') + assert scale4 == 3 + # because scale was fixed, only the mean and skewness will be matched + dist4 = stats.skewnorm(a4, loc4, scale4) + res = dist4.stats(moments='ms') + ref = np.mean(rvs), stats.skew(rvs) + assert_allclose(res, ref) + + # Test behavior when skew of data is beyond maximum of skewnorm + rvs2 = stats.pareto.rvs(1, size=100, random_state=rng) + + # MLE still works + res = stats.skewnorm.fit(rvs2) + assert np.all(np.isfinite(res)) + + # MoM fits variance and skewness + a5, loc5, scale5 = stats.skewnorm.fit(rvs2, method='mm') + assert np.isinf(a5) + # distribution infrastruction doesn't allow infinite shape parameters + # into _stats; it just bypasses it and produces NaNs. Calculate + # moments manually. + m, v = np.mean(rvs2), np.var(rvs2) + assert_allclose(m, loc5 + scale5 * np.sqrt(2/np.pi)) + assert_allclose(v, scale5**2 * (1 - 2 / np.pi)) + + # test that MLE and MoM behave as expected under sign changes + a6p, loc6p, scale6p = stats.skewnorm.fit(rvs, method='mle') + a6m, loc6m, scale6m = stats.skewnorm.fit(-rvs, method='mle') + assert_allclose([a6m, loc6m, scale6m], [-a6p, -loc6p, scale6p]) + a7p, loc7p, scale7p = stats.skewnorm.fit(rvs, method='mm') + a7m, loc7m, scale7m = stats.skewnorm.fit(-rvs, method='mm') + assert_allclose([a7m, loc7m, scale7m], [-a7p, -loc7p, scale7p]) + + def test_fit_gh19332(self): + # When the skewness of the data was high, `skewnorm.fit` fell back on + # generic `fit` behavior with a bad guess of the skewness parameter. + # Test that this is improved; `skewnorm.fit` is now better at finding + # the global optimum when the sample is highly skewed. See gh-19332. + x = np.array([-5, -1, 1 / 100_000] + 12 * [1] + [5]) + + params = stats.skewnorm.fit(x) + res = stats.skewnorm.nnlf(params, x) + + # Compare overridden fit against generic fit. + # res should be about 32.01, and generic fit is worse at 32.64. + # In case the generic fit improves, remove this assertion (see gh-19333). + params_super = stats.skewnorm.fit(x, superfit=True) + ref = stats.skewnorm.nnlf(params_super, x) + assert res < ref - 0.5 + + # Compare overridden fit against stats.fit + rng = np.random.default_rng(9842356982345693637) + bounds = {'a': (-5, 5), 'loc': (-10, 10), 'scale': (1e-16, 10)} + def optimizer(fun, bounds): + return differential_evolution(fun, bounds, seed=rng) + + fit_result = stats.fit(stats.skewnorm, x, bounds, optimizer=optimizer) + np.testing.assert_allclose(params, fit_result.params, rtol=1e-4) + + +class TestExpon: + def test_zero(self): + assert_equal(stats.expon.pdf(0), 1) + + def test_tail(self): # Regression test for ticket 807 + assert_equal(stats.expon.cdf(1e-18), 1e-18) + assert_equal(stats.expon.isf(stats.expon.sf(40)), 40) + + def test_nan_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.nan]) + assert_raises(ValueError, stats.expon.fit, x) + + def test_inf_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.inf]) + assert_raises(ValueError, stats.expon.fit, x) + + +class TestNorm: + def test_nan_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.nan]) + assert_raises(ValueError, stats.norm.fit, x) + + def test_inf_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.inf]) + assert_raises(ValueError, stats.norm.fit, x) + + def test_bad_keyword_arg(self): + x = [1, 2, 3] + assert_raises(TypeError, stats.norm.fit, x, plate="shrimp") + + @pytest.mark.parametrize('loc', [0, 1]) + def test_delta_cdf(self, loc): + # The expected value is computed with mpmath: + # >>> import mpmath + # >>> mpmath.mp.dps = 60 + # >>> float(mpmath.ncdf(12) - mpmath.ncdf(11)) + # 1.910641809677555e-28 + expected = 1.910641809677555e-28 + delta = stats.norm._delta_cdf(11+loc, 12+loc, loc=loc) + assert_allclose(delta, expected, rtol=1e-13) + delta = stats.norm._delta_cdf(-(12+loc), -(11+loc), loc=-loc) + assert_allclose(delta, expected, rtol=1e-13) + + +class TestUniform: + """gh-10300""" + def test_nan_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.nan]) + assert_raises(ValueError, stats.uniform.fit, x) + + def test_inf_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.inf]) + assert_raises(ValueError, stats.uniform.fit, x) + + +class TestExponNorm: + def test_moments(self): + # Some moment test cases based on non-loc/scaled formula + def get_moms(lam, sig, mu): + # See wikipedia for these formulae + # where it is listed as an exponentially modified gaussian + opK2 = 1.0 + 1 / (lam*sig)**2 + exp_skew = 2 / (lam * sig)**3 * opK2**(-1.5) + exp_kurt = 6.0 * (1 + (lam * sig)**2)**(-2) + return [mu + 1/lam, sig*sig + 1.0/(lam*lam), exp_skew, exp_kurt] + + mu, sig, lam = 0, 1, 1 + K = 1.0 / (lam * sig) + sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk') + assert_almost_equal(sts, get_moms(lam, sig, mu)) + mu, sig, lam = -3, 2, 0.1 + K = 1.0 / (lam * sig) + sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk') + assert_almost_equal(sts, get_moms(lam, sig, mu)) + mu, sig, lam = 0, 3, 1 + K = 1.0 / (lam * sig) + sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk') + assert_almost_equal(sts, get_moms(lam, sig, mu)) + mu, sig, lam = -5, 11, 3.5 + K = 1.0 / (lam * sig) + sts = stats.exponnorm.stats(K, loc=mu, scale=sig, moments='mvsk') + assert_almost_equal(sts, get_moms(lam, sig, mu)) + + def test_nan_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.nan]) + assert_raises(ValueError, stats.exponnorm.fit, x, floc=0, fscale=1) + + def test_inf_raises_error(self): + # see gh-issue 10300 + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.inf]) + assert_raises(ValueError, stats.exponnorm.fit, x, floc=0, fscale=1) + + def test_extremes_x(self): + # Test for extreme values against overflows + assert_almost_equal(stats.exponnorm.pdf(-900, 1), 0.0) + assert_almost_equal(stats.exponnorm.pdf(+900, 1), 0.0) + assert_almost_equal(stats.exponnorm.pdf(-900, 0.01), 0.0) + assert_almost_equal(stats.exponnorm.pdf(+900, 0.01), 0.0) + + # Expected values for the PDF were computed with mpmath, with + # the following function, and with mpmath.mp.dps = 50. + # + # def exponnorm_stdpdf(x, K): + # x = mpmath.mpf(x) + # K = mpmath.mpf(K) + # t1 = mpmath.exp(1/(2*K**2) - x/K) + # erfcarg = -(x - 1/K)/mpmath.sqrt(2) + # t2 = mpmath.erfc(erfcarg) + # return t1 * t2 / (2*K) + # + @pytest.mark.parametrize('x, K, expected', + [(20, 0.01, 6.90010764753618e-88), + (1, 0.01, 0.24438994313247364), + (-1, 0.01, 0.23955149623472075), + (-20, 0.01, 4.6004708690125477e-88), + (10, 1, 7.48518298877006e-05), + (10, 10000, 9.990005048283775e-05)]) + def test_std_pdf(self, x, K, expected): + assert_allclose(stats.exponnorm.pdf(x, K), expected, rtol=5e-12) + + # Expected values for the CDF were computed with mpmath using + # the following function and with mpmath.mp.dps = 60: + # + # def mp_exponnorm_cdf(x, K, loc=0, scale=1): + # x = mpmath.mpf(x) + # K = mpmath.mpf(K) + # loc = mpmath.mpf(loc) + # scale = mpmath.mpf(scale) + # z = (x - loc)/scale + # return (mpmath.ncdf(z) + # - mpmath.exp((1/(2*K) - z)/K)*mpmath.ncdf(z - 1/K)) + # + @pytest.mark.parametrize('x, K, scale, expected', + [[0, 0.01, 1, 0.4960109760186432], + [-5, 0.005, 1, 2.7939945412195734e-07], + [-1e4, 0.01, 100, 0.0], + [-1e4, 0.01, 1000, 6.920401854427357e-24], + [5, 0.001, 1, 0.9999997118542392]]) + def test_cdf_small_K(self, x, K, scale, expected): + p = stats.exponnorm.cdf(x, K, scale=scale) + if expected == 0.0: + assert p == 0.0 + else: + assert_allclose(p, expected, rtol=1e-13) + + # Expected values for the SF were computed with mpmath using + # the following function and with mpmath.mp.dps = 60: + # + # def mp_exponnorm_sf(x, K, loc=0, scale=1): + # x = mpmath.mpf(x) + # K = mpmath.mpf(K) + # loc = mpmath.mpf(loc) + # scale = mpmath.mpf(scale) + # z = (x - loc)/scale + # return (mpmath.ncdf(-z) + # + mpmath.exp((1/(2*K) - z)/K)*mpmath.ncdf(z - 1/K)) + # + @pytest.mark.parametrize('x, K, scale, expected', + [[10, 0.01, 1, 8.474702916146657e-24], + [2, 0.005, 1, 0.02302280664231312], + [5, 0.005, 0.5, 8.024820681931086e-24], + [10, 0.005, 0.5, 3.0603340062892486e-89], + [20, 0.005, 0.5, 0.0], + [-3, 0.001, 1, 0.9986545205566117]]) + def test_sf_small_K(self, x, K, scale, expected): + p = stats.exponnorm.sf(x, K, scale=scale) + if expected == 0.0: + assert p == 0.0 + else: + assert_allclose(p, expected, rtol=5e-13) + + +class TestGenExpon: + def test_pdf_unity_area(self): + from scipy.integrate import simpson + # PDF should integrate to one + p = stats.genexpon.pdf(numpy.arange(0, 10, 0.01), 0.5, 0.5, 2.0) + assert_almost_equal(simpson(p, dx=0.01), 1, 1) + + def test_cdf_bounds(self): + # CDF should always be positive + cdf = stats.genexpon.cdf(numpy.arange(0, 10, 0.01), 0.5, 0.5, 2.0) + assert_(numpy.all((0 <= cdf) & (cdf <= 1))) + + # The values of p in the following data were computed with mpmath. + # E.g. the script + # from mpmath import mp + # mp.dps = 80 + # x = mp.mpf('15.0') + # a = mp.mpf('1.0') + # b = mp.mpf('2.0') + # c = mp.mpf('1.5') + # print(float(mp.exp((-a-b)*x + (b/c)*-mp.expm1(-c*x)))) + # prints + # 1.0859444834514553e-19 + @pytest.mark.parametrize('x, p, a, b, c', + [(15, 1.0859444834514553e-19, 1, 2, 1.5), + (0.25, 0.7609068232534623, 0.5, 2, 3), + (0.25, 0.09026661397565876, 9.5, 2, 0.5), + (0.01, 0.9753038265071597, 2.5, 0.25, 0.5), + (3.25, 0.0001962824553094492, 2.5, 0.25, 0.5), + (0.125, 0.9508674287164001, 0.25, 5, 0.5)]) + def test_sf_isf(self, x, p, a, b, c): + sf = stats.genexpon.sf(x, a, b, c) + assert_allclose(sf, p, rtol=2e-14) + isf = stats.genexpon.isf(p, a, b, c) + assert_allclose(isf, x, rtol=2e-14) + + # The values of p in the following data were computed with mpmath. + @pytest.mark.parametrize('x, p, a, b, c', + [(0.25, 0.2390931767465377, 0.5, 2, 3), + (0.25, 0.9097333860243412, 9.5, 2, 0.5), + (0.01, 0.0246961734928403, 2.5, 0.25, 0.5), + (3.25, 0.9998037175446906, 2.5, 0.25, 0.5), + (0.125, 0.04913257128359998, 0.25, 5, 0.5)]) + def test_cdf_ppf(self, x, p, a, b, c): + cdf = stats.genexpon.cdf(x, a, b, c) + assert_allclose(cdf, p, rtol=2e-14) + ppf = stats.genexpon.ppf(p, a, b, c) + assert_allclose(ppf, x, rtol=2e-14) + + +class TestTruncexpon: + + def test_sf_isf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 50; TruncExpon(b=b).sf(x) + b = [20, 100] + x = [19.999999, 99.999999] + ref = [2.0611546593828472e-15, 3.7200778266671455e-50] + assert_allclose(stats.truncexpon.sf(x, b), ref, rtol=1.5e-10) + assert_allclose(stats.truncexpon.isf(ref, b), x, rtol=1e-12) + + +class TestExponpow: + def test_tail(self): + assert_almost_equal(stats.exponpow.cdf(1e-10, 2.), 1e-20) + assert_almost_equal(stats.exponpow.isf(stats.exponpow.sf(5, .8), .8), + 5) + + +class TestSkellam: + def test_pmf(self): + # comparison to R + k = numpy.arange(-10, 15) + mu1, mu2 = 10, 5 + skpmfR = numpy.array( + [4.2254582961926893e-005, 1.1404838449648488e-004, + 2.8979625801752660e-004, 6.9177078182101231e-004, + 1.5480716105844708e-003, 3.2412274963433889e-003, + 6.3373707175123292e-003, 1.1552351566696643e-002, + 1.9606152375042644e-002, 3.0947164083410337e-002, + 4.5401737566767360e-002, 6.1894328166820688e-002, + 7.8424609500170578e-002, 9.2418812533573133e-002, + 1.0139793148019728e-001, 1.0371927988298846e-001, + 9.9076583077406091e-002, 8.8546660073089561e-002, + 7.4187842052486810e-002, 5.8392772862200251e-002, + 4.3268692953013159e-002, 3.0248159818374226e-002, + 1.9991434305603021e-002, 1.2516877303301180e-002, + 7.4389876226229707e-003]) + + assert_almost_equal(stats.skellam.pmf(k, mu1, mu2), skpmfR, decimal=15) + + def test_cdf(self): + # comparison to R, only 5 decimals + k = numpy.arange(-10, 15) + mu1, mu2 = 10, 5 + skcdfR = numpy.array( + [6.4061475386192104e-005, 1.7810985988267694e-004, + 4.6790611790020336e-004, 1.1596768997212152e-003, + 2.7077485103056847e-003, 5.9489760066490718e-003, + 1.2286346724161398e-002, 2.3838698290858034e-002, + 4.3444850665900668e-002, 7.4392014749310995e-002, + 1.1979375231607835e-001, 1.8168808048289900e-001, + 2.6011268998306952e-001, 3.5253150251664261e-001, + 4.5392943399683988e-001, 5.5764871387982828e-001, + 6.5672529695723436e-001, 7.4527195703032389e-001, + 8.1945979908281064e-001, 8.7785257194501087e-001, + 9.2112126489802404e-001, 9.5136942471639818e-001, + 9.7136085902200120e-001, 9.8387773632530240e-001, + 9.9131672394792536e-001]) + + assert_almost_equal(stats.skellam.cdf(k, mu1, mu2), skcdfR, decimal=5) + + def test_extreme_mu2(self): + # check that crash reported by gh-17916 large mu2 is resolved + x, mu1, mu2 = 0, 1, 4820232647677555.0 + assert_allclose(stats.skellam.pmf(x, mu1, mu2), 0, atol=1e-16) + assert_allclose(stats.skellam.cdf(x, mu1, mu2), 1, atol=1e-16) + + +class TestLognorm: + def test_pdf(self): + # Regression test for Ticket #1471: avoid nan with 0/0 situation + # Also make sure there are no warnings at x=0, cf gh-5202 + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + pdf = stats.lognorm.pdf([0, 0.5, 1], 1) + assert_array_almost_equal(pdf, [0.0, 0.62749608, 0.39894228]) + + def test_logcdf(self): + # Regression test for gh-5940: sf et al would underflow too early + x2, mu, sigma = 201.68, 195, 0.149 + assert_allclose(stats.lognorm.sf(x2-mu, s=sigma), + stats.norm.sf(np.log(x2-mu)/sigma)) + assert_allclose(stats.lognorm.logsf(x2-mu, s=sigma), + stats.norm.logsf(np.log(x2-mu)/sigma)) + + @pytest.fixture(scope='function') + def rng(self): + return np.random.default_rng(1234) + + @pytest.mark.parametrize("rvs_shape", [.1, 2]) + @pytest.mark.parametrize("rvs_loc", [-2, 0, 2]) + @pytest.mark.parametrize("rvs_scale", [.2, 1, 5]) + @pytest.mark.parametrize('fix_shape, fix_loc, fix_scale', + [e for e in product((False, True), repeat=3) + if False in e]) + @np.errstate(invalid="ignore") + def test_fit_MLE_comp_optimizer(self, rvs_shape, rvs_loc, rvs_scale, + fix_shape, fix_loc, fix_scale, rng): + data = stats.lognorm.rvs(size=100, s=rvs_shape, scale=rvs_scale, + loc=rvs_loc, random_state=rng) + + kwds = {} + if fix_shape: + kwds['f0'] = rvs_shape + if fix_loc: + kwds['floc'] = rvs_loc + if fix_scale: + kwds['fscale'] = rvs_scale + + # Numerical result may equal analytical result if some code path + # of the analytical routine makes use of numerical optimization. + _assert_less_or_close_loglike(stats.lognorm, data, **kwds, + maybe_identical=True) + + def test_isf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 100; + # LogNormal(s=s).isf(q=0.1, guess=0) + # LogNormal(s=s).isf(q=2e-10, guess=100) + s = 0.954 + q = [0.1, 2e-10, 5e-20, 6e-40] + ref = [3.3960065375794937, 390.07632793595974, 5830.5020828128445, + 287872.84087457904] + assert_allclose(stats.lognorm.isf(q, s), ref, rtol=1e-14) + + +class TestBeta: + def test_logpdf(self): + # Regression test for Ticket #1326: avoid nan with 0*log(0) situation + logpdf = stats.beta.logpdf(0, 1, 0.5) + assert_almost_equal(logpdf, -0.69314718056) + logpdf = stats.beta.logpdf(0, 0.5, 1) + assert_almost_equal(logpdf, np.inf) + + def test_logpdf_ticket_1866(self): + alpha, beta = 267, 1472 + x = np.array([0.2, 0.5, 0.6]) + b = stats.beta(alpha, beta) + assert_allclose(b.logpdf(x).sum(), -1201.699061824062) + assert_allclose(b.pdf(x), np.exp(b.logpdf(x))) + + def test_fit_bad_keyword_args(self): + x = [0.1, 0.5, 0.6] + assert_raises(TypeError, stats.beta.fit, x, floc=0, fscale=1, + plate="shrimp") + + def test_fit_duplicated_fixed_parameter(self): + # At most one of 'f0', 'fa' or 'fix_a' can be given to the fit method. + # More than one raises a ValueError. + x = [0.1, 0.5, 0.6] + assert_raises(ValueError, stats.beta.fit, x, fa=0.5, fix_a=0.5) + + @pytest.mark.skipif(MACOS_INTEL, reason="Overflow, see gh-14901") + def test_issue_12635(self): + # Confirm that Boost's beta distribution resolves gh-12635. + # Check against R: + # options(digits=16) + # p = 0.9999999999997369 + # a = 75.0 + # b = 66334470.0 + # print(qbeta(p, a, b)) + p, a, b = 0.9999999999997369, 75.0, 66334470.0 + assert_allclose(stats.beta.ppf(p, a, b), 2.343620802982393e-06) + + @pytest.mark.skipif(MACOS_INTEL, reason="Overflow, see gh-14901") + def test_issue_12794(self): + # Confirm that Boost's beta distribution resolves gh-12794. + # Check against R. + # options(digits=16) + # p = 1e-11 + # count_list = c(10,100,1000) + # print(qbeta(1-p, count_list + 1, 100000 - count_list)) + inv_R = np.array([0.0004944464889611935, + 0.0018360586912635726, + 0.0122663919942518351]) + count_list = np.array([10, 100, 1000]) + p = 1e-11 + inv = stats.beta.isf(p, count_list + 1, 100000 - count_list) + assert_allclose(inv, inv_R) + res = stats.beta.sf(inv, count_list + 1, 100000 - count_list) + assert_allclose(res, p) + + @pytest.mark.skipif(MACOS_INTEL, reason="Overflow, see gh-14901") + def test_issue_12796(self): + # Confirm that Boost's beta distribution succeeds in the case + # of gh-12796 + alpha_2 = 5e-6 + count_ = np.arange(1, 20) + nobs = 100000 + q, a, b = 1 - alpha_2, count_ + 1, nobs - count_ + inv = stats.beta.ppf(q, a, b) + res = stats.beta.cdf(inv, a, b) + assert_allclose(res, 1 - alpha_2) + + def test_endpoints(self): + # Confirm that boost's beta distribution returns inf at x=1 + # when b<1 + a, b = 1, 0.5 + assert_equal(stats.beta.pdf(1, a, b), np.inf) + + # Confirm that boost's beta distribution returns inf at x=0 + # when a<1 + a, b = 0.2, 3 + assert_equal(stats.beta.pdf(0, a, b), np.inf) + + # Confirm that boost's beta distribution returns 5 at x=0 + # when a=1, b=5 + a, b = 1, 5 + assert_equal(stats.beta.pdf(0, a, b), 5) + assert_equal(stats.beta.pdf(1e-310, a, b), 5) + + # Confirm that boost's beta distribution returns 5 at x=1 + # when a=5, b=1 + a, b = 5, 1 + assert_equal(stats.beta.pdf(1, a, b), 5) + assert_equal(stats.beta.pdf(1-1e-310, a, b), 5) + + @pytest.mark.xfail(reason="Does not warn on special codepath") + def test_boost_eval_issue_14606(self): + q, a, b = 0.995, 1.0e11, 1.0e13 + with pytest.warns(RuntimeWarning): + stats.beta.ppf(q, a, b) + + @pytest.mark.parametrize('method', [stats.beta.ppf, stats.beta.isf]) + @pytest.mark.parametrize('a, b', [(1e-310, 12.5), (12.5, 1e-310)]) + def test_beta_ppf_with_subnormal_a_b(self, method, a, b): + # Regression test for gh-17444: beta.ppf(p, a, b) and beta.isf(p, a, b) + # would result in a segmentation fault if either a or b was subnormal. + p = 0.9 + # Depending on the version of Boost that we have vendored and + # our setting of the Boost double promotion policy, the call + # `stats.beta.ppf(p, a, b)` might raise an OverflowError or + # return a value. We'll accept either behavior (and not care about + # the value), because our goal here is to verify that the call does + # not trigger a segmentation fault. + try: + method(p, a, b) + except OverflowError: + # The OverflowError exception occurs with Boost 1.80 or earlier + # when Boost's double promotion policy is false; see + # https://github.com/boostorg/math/issues/882 + # and + # https://github.com/boostorg/math/pull/883 + # Once we have vendored the fixed version of Boost, we can drop + # this try-except wrapper and just call the function. + pass + + # entropy accuracy was confirmed using the following mpmath function + # from mpmath import mp + # mp.dps = 50 + # def beta_entropy_mpmath(a, b): + # a = mp.mpf(a) + # b = mp.mpf(b) + # entropy = mp.log(mp.beta(a, b)) - (a - 1) * mp.digamma(a) -\ + # (b - 1) * mp.digamma(b) + (a + b -2) * mp.digamma(a + b) + # return float(entropy) + + @pytest.mark.parametrize('a, b, ref', + [(0.5, 0.5, -0.24156447527049044), + (0.001, 1, -992.0922447210179), + (1, 10000, -8.210440371976183), + (100000, 100000, -5.377247470132859)]) + def test_entropy(self, a, b, ref): + assert_allclose(stats.beta(a, b).entropy(), ref) + + @pytest.mark.parametrize( + "a, b, ref, tol", + [ + (1, 10, -1.4025850929940458, 1e-14), + (10, 20, -1.0567887388936708, 1e-13), + (4e6, 4e6+20, -7.221686009678741, 1e-9), + (5e6, 5e6+10, -7.333257022834638, 1e-8), + (1e10, 1e10+20, -11.133707703130474, 1e-11), + (1e50, 1e50+20, -57.185409562486385, 1e-15), + (2, 1e10, -21.448635265288925, 1e-11), + (2, 1e20, -44.47448619497938, 1e-14), + (2, 1e50, -113.55203898480075, 1e-14), + (5, 1e10, -20.87226777401971, 1e-10), + (5, 1e20, -43.89811870326017, 1e-14), + (5, 1e50, -112.97567149308153, 1e-14), + (10, 1e10, -20.489796752909477, 1e-9), + (10, 1e20, -43.51564768139993, 1e-14), + (10, 1e50, -112.59320047122131, 1e-14), + (1e20, 2, -44.47448619497938, 1e-14), + (1e20, 5, -43.89811870326017, 1e-14), + (1e50, 10, -112.59320047122131, 1e-14), + ] + ) + def test_extreme_entropy(self, a, b, ref, tol): + # Reference values were calculated with mpmath: + # from mpmath import mp + # mp.dps = 500 + # + # def beta_entropy_mpmath(a, b): + # a = mp.mpf(a) + # b = mp.mpf(b) + # entropy = ( + # mp.log(mp.beta(a, b)) - (a - 1) * mp.digamma(a) + # - (b - 1) * mp.digamma(b) + (a + b - 2) * mp.digamma(a + b) + # ) + # return float(entropy) + assert_allclose(stats.beta(a, b).entropy(), ref, rtol=tol) + + +class TestBetaPrime: + # the test values are used in test_cdf_gh_17631 / test_ppf_gh_17631 + # They are computed with mpmath. Example: + # from mpmath import mp + # mp.dps = 50 + # a, b = mp.mpf(0.05), mp.mpf(0.1) + # x = mp.mpf(1e22) + # float(mp.betainc(a, b, 0.0, x/(1+x), regularized=True)) + # note: we use the values computed by the cdf to test whether + # ppf(cdf(x)) == x (up to a small tolerance) + # since the ppf can be very sensitive to small variations of the input, + # it can be required to generate the test case for the ppf separately, + # see self.test_ppf + cdf_vals = [ + (1e22, 100.0, 0.05, 0.8973027435427167), + (1e10, 100.0, 0.05, 0.5911548582766262), + (1e8, 0.05, 0.1, 0.9467768090820048), + (1e8, 100.0, 0.05, 0.4852944858726726), + (1e-10, 0.05, 0.1, 0.21238845427095), + (1e-10, 1.5, 1.5, 1.697652726007973e-15), + (1e-10, 0.05, 100.0, 0.40884514172337383), + (1e-22, 0.05, 0.1, 0.053349567649287326), + (1e-22, 1.5, 1.5, 1.6976527263135503e-33), + (1e-22, 0.05, 100.0, 0.10269725645728331), + (1e-100, 0.05, 0.1, 6.7163126421919795e-06), + (1e-100, 1.5, 1.5, 1.6976527263135503e-150), + (1e-100, 0.05, 100.0, 1.2928818587561651e-05), + ] + + def test_logpdf(self): + alpha, beta = 267, 1472 + x = np.array([0.2, 0.5, 0.6]) + b = stats.betaprime(alpha, beta) + assert_(np.isfinite(b.logpdf(x)).all()) + assert_allclose(b.pdf(x), np.exp(b.logpdf(x))) + + def test_cdf(self): + # regression test for gh-4030: Implementation of + # scipy.stats.betaprime.cdf() + x = stats.betaprime.cdf(0, 0.2, 0.3) + assert_equal(x, 0.0) + + alpha, beta = 267, 1472 + x = np.array([0.2, 0.5, 0.6]) + cdfs = stats.betaprime.cdf(x, alpha, beta) + assert_(np.isfinite(cdfs).all()) + + # check the new cdf implementation vs generic one: + gen_cdf = stats.rv_continuous._cdf_single + cdfs_g = [gen_cdf(stats.betaprime, val, alpha, beta) for val in x] + assert_allclose(cdfs, cdfs_g, atol=0, rtol=2e-12) + + # The expected values for test_ppf() were computed with mpmath, e.g. + # + # from mpmath import mp + # mp.dps = 125 + # p = 0.01 + # a, b = 1.25, 2.5 + # x = mp.findroot(lambda t: mp.betainc(a, b, x1=0, x2=t/(1+t), + # regularized=True) - p, + # x0=(0.01, 0.011), method='secant') + # print(float(x)) + # + # prints + # + # 0.01080162700956614 + # + @pytest.mark.parametrize( + 'p, a, b, expected', + [(0.010, 1.25, 2.5, 0.01080162700956614), + (1e-12, 1.25, 2.5, 1.0610141996279122e-10), + (1e-18, 1.25, 2.5, 1.6815941817974941e-15), + (1e-17, 0.25, 7.0, 1.0179194531881782e-69), + (0.375, 0.25, 7.0, 0.002036820346115211), + (0.9978811466052919, 0.05, 0.1, 1.0000000000001218e22),] + ) + def test_ppf(self, p, a, b, expected): + x = stats.betaprime.ppf(p, a, b) + assert_allclose(x, expected, rtol=1e-14) + + @pytest.mark.parametrize('x, a, b, p', cdf_vals) + def test_ppf_gh_17631(self, x, a, b, p): + assert_allclose(stats.betaprime.ppf(p, a, b), x, rtol=1e-14) + + @pytest.mark.parametrize( + 'x, a, b, expected', + cdf_vals + [ + (1e10, 1.5, 1.5, 0.9999999999999983), + (1e10, 0.05, 0.1, 0.9664184367890859), + (1e22, 0.05, 0.1, 0.9978811466052919), + ]) + def test_cdf_gh_17631(self, x, a, b, expected): + assert_allclose(stats.betaprime.cdf(x, a, b), expected, rtol=1e-14) + + @pytest.mark.parametrize( + 'x, a, b, expected', + [(1e50, 0.05, 0.1, 0.9999966641709545), + (1e50, 100.0, 0.05, 0.995925162631006)]) + def test_cdf_extreme_tails(self, x, a, b, expected): + # for even more extreme values, we only get a few correct digits + # results are still < 1 + y = stats.betaprime.cdf(x, a, b) + assert y < 1.0 + assert_allclose(y, expected, rtol=2e-5) + + def test_sf(self): + # reference values were computed via the reference distribution, + # e.g. + # mp.dps = 50 + # a, b = 5, 3 + # x = 1e10 + # BetaPrime(a=a, b=b).sf(x); returns 3.4999999979e-29 + a = [5, 4, 2, 0.05, 0.05, 0.05, 0.05, 100.0, 100.0, 0.05, 0.05, + 0.05, 1.5, 1.5] + b = [3, 2, 1, 0.1, 0.1, 0.1, 0.1, 0.05, 0.05, 100.0, 100.0, + 100.0, 1.5, 1.5] + x = [1e10, 1e20, 1e30, 1e22, 1e-10, 1e-22, 1e-100, 1e22, 1e10, + 1e-10, 1e-22, 1e-100, 1e10, 1e-10] + ref = [3.4999999979e-29, 9.999999999994357e-40, 1.9999999999999998e-30, + 0.0021188533947081017, 0.78761154572905, 0.9466504323507127, + 0.9999932836873578, 0.10269725645728331, 0.40884514172337383, + 0.5911548582766262, 0.8973027435427167, 0.9999870711814124, + 1.6976527260079727e-15, 0.9999999999999983] + sf_values = stats.betaprime.sf(x, a, b) + assert_allclose(sf_values, ref, rtol=1e-12) + + def test_fit_stats_gh18274(self): + # gh-18274 reported spurious warning emitted when fitting `betaprime` + # to data. Some of these were emitted by stats, too. Check that the + # warnings are no longer emitted. + stats.betaprime.fit([0.1, 0.25, 0.3, 1.2, 1.6], floc=0, fscale=1) + stats.betaprime(a=1, b=1).stats('mvsk') + + def test_moment_gh18634(self): + # Testing for gh-18634 revealed that `betaprime` raised a + # NotImplementedError for higher moments. Check that this is + # resolved. Parameters are arbitrary but lie on either side of the + # moment order (5) to test both branches of `_lazywhere`. Reference + # values produced with Mathematica, e.g. + # `Moment[BetaPrimeDistribution[2,7],5]` + ref = [np.inf, 0.867096912929055] + res = stats.betaprime(2, [4.2, 7.1]).moment(5) + assert_allclose(res, ref) + + +class TestGamma: + def test_pdf(self): + # a few test cases to compare with R + pdf = stats.gamma.pdf(90, 394, scale=1./5) + assert_almost_equal(pdf, 0.002312341) + + pdf = stats.gamma.pdf(3, 10, scale=1./5) + assert_almost_equal(pdf, 0.1620358) + + def test_logpdf(self): + # Regression test for Ticket #1326: cornercase avoid nan with 0*log(0) + # situation + logpdf = stats.gamma.logpdf(0, 1) + assert_almost_equal(logpdf, 0) + + def test_fit_bad_keyword_args(self): + x = [0.1, 0.5, 0.6] + assert_raises(TypeError, stats.gamma.fit, x, floc=0, plate="shrimp") + + def test_isf(self): + # Test cases for when the probability is very small. See gh-13664. + # The expected values can be checked with mpmath. With mpmath, + # the survival function sf(x, k) can be computed as + # + # mpmath.gammainc(k, x, mpmath.inf, regularized=True) + # + # Here we have: + # + # >>> mpmath.mp.dps = 60 + # >>> float(mpmath.gammainc(1, 39.14394658089878, mpmath.inf, + # ... regularized=True)) + # 9.99999999999999e-18 + # >>> float(mpmath.gammainc(100, 330.6557590436547, mpmath.inf, + # regularized=True)) + # 1.000000000000028e-50 + # + assert np.isclose(stats.gamma.isf(1e-17, 1), + 39.14394658089878, atol=1e-14) + assert np.isclose(stats.gamma.isf(1e-50, 100), + 330.6557590436547, atol=1e-13) + + @pytest.mark.parametrize('scale', [1.0, 5.0]) + def test_delta_cdf(self, scale): + # Expected value computed with mpmath: + # + # >>> import mpmath + # >>> mpmath.mp.dps = 150 + # >>> cdf1 = mpmath.gammainc(3, 0, 245, regularized=True) + # >>> cdf2 = mpmath.gammainc(3, 0, 250, regularized=True) + # >>> float(cdf2 - cdf1) + # 1.1902609356171962e-102 + # + delta = stats.gamma._delta_cdf(scale*245, scale*250, 3, scale=scale) + assert_allclose(delta, 1.1902609356171962e-102, rtol=1e-13) + + @pytest.mark.parametrize('a, ref, rtol', + [(1e-4, -9990.366610819761, 1e-15), + (2, 1.5772156649015328, 1e-15), + (100, 3.7181819485047463, 1e-13), + (1e4, 6.024075385026086, 1e-15), + (1e18, 22.142204370151084, 1e-15), + (1e100, 116.54819318290696, 1e-15)]) + def test_entropy(self, a, ref, rtol): + # expected value computed with mpmath: + # from mpmath import mp + # mp.dps = 500 + # def gamma_entropy_reference(x): + # x = mp.mpf(x) + # return float(mp.digamma(x) * (mp.one - x) + x + mp.loggamma(x)) + + assert_allclose(stats.gamma.entropy(a), ref, rtol=rtol) + + @pytest.mark.parametrize("a", [1e-2, 1, 1e2]) + @pytest.mark.parametrize("loc", [1e-2, 0, 1e2]) + @pytest.mark.parametrize('scale', [1e-2, 1, 1e2]) + @pytest.mark.parametrize('fix_a', [True, False]) + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + def test_fit_mm(self, a, loc, scale, fix_a, fix_loc, fix_scale): + rng = np.random.default_rng(6762668991392531563) + data = stats.gamma.rvs(a, loc=loc, scale=scale, size=100, + random_state=rng) + + kwds = {} + if fix_a: + kwds['fa'] = a + if fix_loc: + kwds['floc'] = loc + if fix_scale: + kwds['fscale'] = scale + nfree = 3 - len(kwds) + + if nfree == 0: + error_msg = "All parameters fixed. There is nothing to optimize." + with pytest.raises(ValueError, match=error_msg): + stats.gamma.fit(data, method='mm', **kwds) + return + + theta = stats.gamma.fit(data, method='mm', **kwds) + dist = stats.gamma(*theta) + if nfree >= 1: + assert_allclose(dist.mean(), np.mean(data)) + if nfree >= 2: + assert_allclose(dist.moment(2), np.mean(data**2)) + if nfree >= 3: + assert_allclose(dist.moment(3), np.mean(data**3)) + +def test_pdf_overflow_gh19616(): + # Confirm that gh19616 (intermediate over/underflows in PDF) is resolved + # Reference value from R GeneralizedHyperbolic library + # library(GeneralizedHyperbolic) + # options(digits=16) + # jitter = 1e-3 + # dnig(1, a=2**0.5 / jitter**2, b=1 / jitter**2) + jitter = 1e-3 + Z = stats.norminvgauss(2**0.5 / jitter**2, 1 / jitter**2, loc=0, scale=1) + assert_allclose(Z.pdf(1.0), 282.0948446666433) + + +class TestDgamma: + def test_pdf(self): + rng = np.random.default_rng(3791303244302340058) + size = 10 # number of points to check + x = rng.normal(scale=10, size=size) + a = rng.uniform(high=10, size=size) + res = stats.dgamma.pdf(x, a) + ref = stats.gamma.pdf(np.abs(x), a) / 2 + assert_allclose(res, ref) + + dist = stats.dgamma(a) + # There was an intermittent failure with assert_equal on Linux - 32 bit + assert_allclose(dist.pdf(x), res, rtol=5e-16) + + # mpmath was used to compute the expected values. + # For x < 0, cdf(x, a) is mp.gammainc(a, -x, mp.inf, regularized=True)/2 + # For x > 0, cdf(x, a) is (1 + mp.gammainc(a, 0, x, regularized=True))/2 + # E.g. + # from mpmath import mp + # mp.dps = 50 + # print(float(mp.gammainc(1, 20, mp.inf, regularized=True)/2)) + # prints + # 1.030576811219279e-09 + @pytest.mark.parametrize('x, a, expected', + [(-20, 1, 1.030576811219279e-09), + (-40, 1, 2.1241771276457944e-18), + (-50, 5, 2.7248509914602648e-17), + (-25, 0.125, 5.333071920958156e-14), + (5, 1, 0.9966310265004573)]) + def test_cdf_ppf_sf_isf_tail(self, x, a, expected): + cdf = stats.dgamma.cdf(x, a) + assert_allclose(cdf, expected, rtol=5e-15) + ppf = stats.dgamma.ppf(expected, a) + assert_allclose(ppf, x, rtol=5e-15) + sf = stats.dgamma.sf(-x, a) + assert_allclose(sf, expected, rtol=5e-15) + isf = stats.dgamma.isf(expected, a) + assert_allclose(isf, -x, rtol=5e-15) + + @pytest.mark.parametrize("a, ref", + [(1.5, 2.0541199559354117), + (1.3, 1.9357296377121247), + (1.1, 1.7856502333412134)]) + def test_entropy(self, a, ref): + # The reference values were calculated with mpmath: + # def entropy_dgamma(a): + # def pdf(x): + # A = mp.one / (mp.mpf(2.) * mp.gamma(a)) + # B = mp.fabs(x) ** (a - mp.one) + # C = mp.exp(-mp.fabs(x)) + # h = A * B * C + # return h + # + # return -mp.quad(lambda t: pdf(t) * mp.log(pdf(t)), + # [-mp.inf, mp.inf]) + assert_allclose(stats.dgamma.entropy(a), ref, rtol=1e-14) + + @pytest.mark.parametrize("a, ref", + [(1e-100, -1e+100), + (1e-10, -9999999975.858217), + (1e-5, -99987.37111657023), + (1e4, 6.717222565586032), + (1000000000000000.0, 19.38147391121996), + (1e+100, 117.2413403634669)]) + def test_entropy_entreme_values(self, a, ref): + # The reference values were calculated with mpmath: + # from mpmath import mp + # mp.dps = 500 + # def second_dgamma(a): + # a = mp.mpf(a) + # x_1 = a + mp.log(2) + mp.loggamma(a) + # x_2 = (mp.one - a) * mp.digamma(a) + # h = x_1 + x_2 + # return h + assert_allclose(stats.dgamma.entropy(a), ref, rtol=1e-10) + + def test_entropy_array_input(self): + x = np.array([1, 5, 1e20, 1e-5]) + y = stats.dgamma.entropy(x) + for i in range(len(y)): + assert y[i] == stats.dgamma.entropy(x[i]) + + +class TestChi2: + # regression tests after precision improvements, ticket:1041, not verified + def test_precision(self): + assert_almost_equal(stats.chi2.pdf(1000, 1000), 8.919133934753128e-003, + decimal=14) + assert_almost_equal(stats.chi2.pdf(100, 100), 0.028162503162596778, + decimal=14) + + def test_ppf(self): + # Expected values computed with mpmath. + df = 4.8 + x = stats.chi2.ppf(2e-47, df) + assert_allclose(x, 1.098472479575179840604902808e-19, rtol=1e-10) + x = stats.chi2.ppf(0.5, df) + assert_allclose(x, 4.15231407598589358660093156, rtol=1e-10) + + df = 13 + x = stats.chi2.ppf(2e-77, df) + assert_allclose(x, 1.0106330688195199050507943e-11, rtol=1e-10) + x = stats.chi2.ppf(0.1, df) + assert_allclose(x, 7.041504580095461859307179763, rtol=1e-10) + + # Entropy references values were computed with the following mpmath code + # from mpmath import mp + # mp.dps = 50 + # def chisq_entropy_mpmath(df): + # df = mp.mpf(df) + # half_df = 0.5 * df + # entropy = (half_df + mp.log(2) + mp.log(mp.gamma(half_df)) + + # (mp.one - half_df) * mp.digamma(half_df)) + # return float(entropy) + + @pytest.mark.parametrize('df, ref', + [(1e-4, -19988.980448690163), + (1, 0.7837571104739337), + (100, 4.061397128938114), + (251, 4.525577254045129), + (1e15, 19.034900320939986)]) + def test_entropy(self, df, ref): + assert_allclose(stats.chi2(df).entropy(), ref, rtol=1e-13) + + +class TestGumbelL: + # gh-6228 + def test_cdf_ppf(self): + x = np.linspace(-100, -4) + y = stats.gumbel_l.cdf(x) + xx = stats.gumbel_l.ppf(y) + assert_allclose(x, xx) + + def test_logcdf_logsf(self): + x = np.linspace(-100, -4) + y = stats.gumbel_l.logcdf(x) + z = stats.gumbel_l.logsf(x) + u = np.exp(y) + v = -special.expm1(z) + assert_allclose(u, v) + + def test_sf_isf(self): + x = np.linspace(-20, 5) + y = stats.gumbel_l.sf(x) + xx = stats.gumbel_l.isf(y) + assert_allclose(x, xx) + + @pytest.mark.parametrize('loc', [-1, 1]) + def test_fit_fixed_param(self, loc): + # ensure fixed location is correctly reflected from `gumbel_r.fit` + # See comments at end of gh-12737. + data = stats.gumbel_l.rvs(size=100, loc=loc) + fitted_loc, _ = stats.gumbel_l.fit(data, floc=loc) + assert_equal(fitted_loc, loc) + + +class TestGumbelR: + + def test_sf(self): + # Expected value computed with mpmath: + # >>> import mpmath + # >>> mpmath.mp.dps = 40 + # >>> float(mpmath.mp.one - mpmath.exp(-mpmath.exp(-50))) + # 1.9287498479639178e-22 + assert_allclose(stats.gumbel_r.sf(50), 1.9287498479639178e-22, + rtol=1e-14) + + def test_isf(self): + # Expected value computed with mpmath: + # >>> import mpmath + # >>> mpmath.mp.dps = 40 + # >>> float(-mpmath.log(-mpmath.log(mpmath.mp.one - 1e-17))) + # 39.14394658089878 + assert_allclose(stats.gumbel_r.isf(1e-17), 39.14394658089878, + rtol=1e-14) + + +class TestLevyStable: + @pytest.fixture(autouse=True) + def reset_levy_stable_params(self): + """Setup default parameters for levy_stable generator""" + stats.levy_stable.parameterization = "S1" + stats.levy_stable.cdf_default_method = "piecewise" + stats.levy_stable.pdf_default_method = "piecewise" + stats.levy_stable.quad_eps = stats._levy_stable._QUAD_EPS + + @pytest.fixture + def nolan_pdf_sample_data(self): + """Sample data points for pdf computed with Nolan's stablec + + See - http://fs2.american.edu/jpnolan/www/stable/stable.html + + There's a known limitation of Nolan's executable for alpha < 0.2. + + The data table loaded below is generated from Nolan's stablec + with the following parameter space: + + alpha = 0.1, 0.2, ..., 2.0 + beta = -1.0, -0.9, ..., 1.0 + p = 0.01, 0.05, 0.1, 0.25, 0.35, 0.5, + and the equivalent for the right tail + + Typically inputs for stablec: + + stablec.exe << + 1 # pdf + 1 # Nolan S equivalent to S0 in scipy + .25,2,.25 # alpha + -1,-1,0 # beta + -10,10,1 # x + 1,0 # gamma, delta + 2 # output file + """ + data = np.load( + Path(__file__).parent / + 'data/levy_stable/stable-Z1-pdf-sample-data.npy' + ) + data = np.rec.fromarrays(data.T, names='x,p,alpha,beta,pct') + return data + + @pytest.fixture + def nolan_cdf_sample_data(self): + """Sample data points for cdf computed with Nolan's stablec + + See - http://fs2.american.edu/jpnolan/www/stable/stable.html + + There's a known limitation of Nolan's executable for alpha < 0.2. + + The data table loaded below is generated from Nolan's stablec + with the following parameter space: + + alpha = 0.1, 0.2, ..., 2.0 + beta = -1.0, -0.9, ..., 1.0 + p = 0.01, 0.05, 0.1, 0.25, 0.35, 0.5, + + and the equivalent for the right tail + + Ideally, Nolan's output for CDF values should match the percentile + from where they have been sampled from. Even more so as we extract + percentile x positions from stablec too. However, we note at places + Nolan's stablec will produce absolute errors in order of 1e-5. We + compare against his calculations here. In future, once we less + reliant on Nolan's paper we might switch to comparing directly at + percentiles (those x values being produced from some alternative + means). + + Typically inputs for stablec: + + stablec.exe << + 2 # cdf + 1 # Nolan S equivalent to S0 in scipy + .25,2,.25 # alpha + -1,-1,0 # beta + -10,10,1 # x + 1,0 # gamma, delta + 2 # output file + """ + data = np.load( + Path(__file__).parent / + 'data/levy_stable/stable-Z1-cdf-sample-data.npy' + ) + data = np.rec.fromarrays(data.T, names='x,p,alpha,beta,pct') + return data + + @pytest.fixture + def nolan_loc_scale_sample_data(self): + """Sample data where loc, scale are different from 0, 1 + + Data extracted in similar way to pdf/cdf above using + Nolan's stablec but set to an arbitrary location scale of + (2, 3) for various important parameters alpha, beta and for + parameterisations S0 and S1. + """ + data = np.load( + Path(__file__).parent / + 'data/levy_stable/stable-loc-scale-sample-data.npy' + ) + return data + + @pytest.mark.parametrize( + "sample_size", [ + pytest.param(50), pytest.param(1500, marks=pytest.mark.slow) + ] + ) + @pytest.mark.parametrize("parameterization", ["S0", "S1"]) + @pytest.mark.parametrize( + "alpha,beta", [(1.0, 0), (1.0, -0.5), (1.5, 0), (1.9, 0.5)] + ) + @pytest.mark.parametrize("gamma,delta", [(1, 0), (3, 2)]) + def test_rvs( + self, + parameterization, + alpha, + beta, + gamma, + delta, + sample_size, + ): + stats.levy_stable.parameterization = parameterization + ls = stats.levy_stable( + alpha=alpha, beta=beta, scale=gamma, loc=delta + ) + _, p = stats.kstest( + ls.rvs(size=sample_size, random_state=1234), ls.cdf + ) + assert p > 0.05 + + @pytest.mark.slow + @pytest.mark.parametrize('beta', [0.5, 1]) + def test_rvs_alpha1(self, beta): + """Additional test cases for rvs for alpha equal to 1.""" + np.random.seed(987654321) + alpha = 1.0 + loc = 0.5 + scale = 1.5 + x = stats.levy_stable.rvs(alpha, beta, loc=loc, scale=scale, + size=5000) + stat, p = stats.kstest(x, 'levy_stable', + args=(alpha, beta, loc, scale)) + assert p > 0.01 + + def test_fit(self): + # construct data to have percentiles that match + # example in McCulloch 1986. + x = [ + -.05413, -.05413, 0., 0., 0., 0., .00533, .00533, .00533, .00533, + .00533, .03354, .03354, .03354, .03354, .03354, .05309, .05309, + .05309, .05309, .05309 + ] + alpha1, beta1, loc1, scale1 = stats.levy_stable._fitstart(x) + assert_allclose(alpha1, 1.48, rtol=0, atol=0.01) + assert_almost_equal(beta1, -.22, 2) + assert_almost_equal(scale1, 0.01717, 4) + assert_almost_equal( + loc1, 0.00233, 2 + ) # to 2 dps due to rounding error in McCulloch86 + + # cover alpha=2 scenario + x2 = x + [.05309, .05309, .05309, .05309, .05309] + alpha2, beta2, loc2, scale2 = stats.levy_stable._fitstart(x2) + assert_equal(alpha2, 2) + assert_equal(beta2, -1) + assert_almost_equal(scale2, .02503, 4) + assert_almost_equal(loc2, .03354, 4) + + @pytest.mark.xfail(reason="Unknown problem with fitstart.") + @pytest.mark.parametrize( + "alpha,beta,delta,gamma", + [ + (1.5, 0.4, 2, 3), + (1.0, 0.4, 2, 3), + ] + ) + @pytest.mark.parametrize( + "parametrization", ["S0", "S1"] + ) + def test_fit_rvs(self, alpha, beta, delta, gamma, parametrization): + """Test that fit agrees with rvs for each parametrization.""" + stats.levy_stable.parametrization = parametrization + data = stats.levy_stable.rvs( + alpha, beta, loc=delta, scale=gamma, size=10000, random_state=1234 + ) + fit = stats.levy_stable._fitstart(data) + alpha_obs, beta_obs, delta_obs, gamma_obs = fit + assert_allclose( + [alpha, beta, delta, gamma], + [alpha_obs, beta_obs, delta_obs, gamma_obs], + rtol=0.01, + ) + + def test_fit_beta_flip(self): + # Confirm that sign of beta affects loc, not alpha or scale. + x = np.array([1, 1, 3, 3, 10, 10, 10, 30, 30, 100, 100]) + alpha1, beta1, loc1, scale1 = stats.levy_stable._fitstart(x) + alpha2, beta2, loc2, scale2 = stats.levy_stable._fitstart(-x) + assert_equal(beta1, 1) + assert loc1 != 0 + assert_almost_equal(alpha2, alpha1) + assert_almost_equal(beta2, -beta1) + assert_almost_equal(loc2, -loc1) + assert_almost_equal(scale2, scale1) + + def test_fit_delta_shift(self): + # Confirm that loc slides up and down if data shifts. + SHIFT = 1 + x = np.array([1, 1, 3, 3, 10, 10, 10, 30, 30, 100, 100]) + alpha1, beta1, loc1, scale1 = stats.levy_stable._fitstart(-x) + alpha2, beta2, loc2, scale2 = stats.levy_stable._fitstart(-x + SHIFT) + assert_almost_equal(alpha2, alpha1) + assert_almost_equal(beta2, beta1) + assert_almost_equal(loc2, loc1 + SHIFT) + assert_almost_equal(scale2, scale1) + + def test_fit_loc_extrap(self): + # Confirm that loc goes out of sample for alpha close to 1. + x = [1, 1, 3, 3, 10, 10, 10, 30, 30, 140, 140] + alpha1, beta1, loc1, scale1 = stats.levy_stable._fitstart(x) + assert alpha1 < 1, f"Expected alpha < 1, got {alpha1}" + assert loc1 < min(x), f"Expected loc < {min(x)}, got {loc1}" + + x2 = [1, 1, 3, 3, 10, 10, 10, 30, 30, 130, 130] + alpha2, beta2, loc2, scale2 = stats.levy_stable._fitstart(x2) + assert alpha2 > 1, f"Expected alpha > 1, got {alpha2}" + assert loc2 > max(x2), f"Expected loc > {max(x2)}, got {loc2}" + + @pytest.mark.parametrize( + "pct_range,alpha_range,beta_range", [ + pytest.param( + [.01, .5, .99], + [.1, 1, 2], + [-1, 0, .8], + ), + pytest.param( + [.01, .05, .5, .95, .99], + [.1, .5, 1, 1.5, 2], + [-.9, -.5, 0, .3, .6, 1], + marks=pytest.mark.slow + ), + pytest.param( + [.01, .05, .1, .25, .35, .5, .65, .75, .9, .95, .99], + np.linspace(0.1, 2, 20), + np.linspace(-1, 1, 21), + marks=pytest.mark.xslow, + ), + ] + ) + def test_pdf_nolan_samples( + self, nolan_pdf_sample_data, pct_range, alpha_range, beta_range + ): + """Test pdf values against Nolan's stablec.exe output""" + data = nolan_pdf_sample_data + + # some tests break on linux 32 bit + uname = platform.uname() + is_linux_32 = uname.system == 'Linux' and uname.machine == 'i686' + platform_desc = "/".join( + [uname.system, uname.machine, uname.processor]) + + # fmt: off + # There are a number of cases which fail on some but not all platforms. + # These are excluded by the filters below. TODO: Rewrite tests so that + # the now filtered out test cases are still run but marked in pytest as + # expected to fail. + tests = [ + [ + 'dni', 1e-7, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + ~( + ( + (r['beta'] == 0) & + (r['pct'] == 0.5) + ) | + ( + (r['beta'] >= 0.9) & + (r['alpha'] >= 1.6) & + (r['pct'] == 0.5) + ) | + ( + (r['alpha'] <= 0.4) & + np.isin(r['pct'], [.01, .99]) + ) | + ( + (r['alpha'] <= 0.3) & + np.isin(r['pct'], [.05, .95]) + ) | + ( + (r['alpha'] <= 0.2) & + np.isin(r['pct'], [.1, .9]) + ) | + ( + (r['alpha'] == 0.1) & + np.isin(r['pct'], [.25, .75]) & + np.isin(np.abs(r['beta']), [.5, .6, .7]) + ) | + ( + (r['alpha'] == 0.1) & + np.isin(r['pct'], [.5]) & + np.isin(np.abs(r['beta']), [.1]) + ) | + ( + (r['alpha'] == 0.1) & + np.isin(r['pct'], [.35, .65]) & + np.isin(np.abs(r['beta']), [-.4, -.3, .3, .4, .5]) + ) | + ( + (r['alpha'] == 0.2) & + (r['beta'] == 0.5) & + (r['pct'] == 0.25) + ) | + ( + (r['alpha'] == 0.2) & + (r['beta'] == -0.3) & + (r['pct'] == 0.65) + ) | + ( + (r['alpha'] == 0.2) & + (r['beta'] == 0.3) & + (r['pct'] == 0.35) + ) | + ( + (r['alpha'] == 1.) & + np.isin(r['pct'], [.5]) & + np.isin(np.abs(r['beta']), [.1, .2, .3, .4]) + ) | + ( + (r['alpha'] == 1.) & + np.isin(r['pct'], [.35, .65]) & + np.isin(np.abs(r['beta']), [.8, .9, 1.]) + ) | + ( + (r['alpha'] == 1.) & + np.isin(r['pct'], [.01, .99]) & + np.isin(np.abs(r['beta']), [-.1, .1]) + ) | + # various points ok but too sparse to list + (r['alpha'] >= 1.1) + ) + ) + ], + # piecewise generally good accuracy + [ + 'piecewise', 1e-11, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 0.2) & + (r['alpha'] != 1.) + ) + ], + # for alpha = 1. for linux 32 bit optimize.bisect + # has some issues for .01 and .99 percentile + [ + 'piecewise', 1e-11, lambda r: ( + (r['alpha'] == 1.) & + (not is_linux_32) & + np.isin(r['pct'], pct_range) & + (1. in alpha_range) & + np.isin(r['beta'], beta_range) + ) + ], + # for small alpha very slightly reduced accuracy + [ + 'piecewise', 2.5e-10, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] <= 0.2) + ) + ], + # fft accuracy reduces as alpha decreases + [ + 'fft-simpson', 1e-5, lambda r: ( + (r['alpha'] >= 1.9) & + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) + ), + ], + [ + 'fft-simpson', 1e-6, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 1) & + (r['alpha'] < 1.9) + ) + ], + # fft relative errors for alpha < 1, will raise if enabled + # ['fft-simpson', 1e-4, lambda r: r['alpha'] == 0.9], + # ['fft-simpson', 1e-3, lambda r: r['alpha'] == 0.8], + # ['fft-simpson', 1e-2, lambda r: r['alpha'] == 0.7], + # ['fft-simpson', 1e-1, lambda r: r['alpha'] == 0.6], + ] + # fmt: on + for ix, (default_method, rtol, + filter_func) in enumerate(tests): + stats.levy_stable.pdf_default_method = default_method + subdata = data[filter_func(data) + ] if filter_func is not None else data + with suppress_warnings() as sup: + # occurs in FFT methods only + sup.record( + RuntimeWarning, + "Density calculations experimental for FFT method.*" + ) + p = stats.levy_stable.pdf( + subdata['x'], + subdata['alpha'], + subdata['beta'], + scale=1, + loc=0 + ) + with np.errstate(over="ignore"): + subdata2 = rec_append_fields( + subdata, + ['calc', 'abserr', 'relerr'], + [ + p, + np.abs(p - subdata['p']), + np.abs(p - subdata['p']) / np.abs(subdata['p']) + ] + ) + failures = subdata2[ + (subdata2['relerr'] >= rtol) | + np.isnan(p) + ] + message = ( + f"pdf test {ix} failed with method '{default_method}' " + f"[platform: {platform_desc}]\n{failures.dtype.names}\n{failures}" + ) + assert_allclose( + p, + subdata['p'], + rtol, + err_msg=message, + verbose=False + ) + + @pytest.mark.parametrize( + "pct_range,alpha_range,beta_range", [ + pytest.param( + [.01, .5, .99], + [.1, 1, 2], + [-1, 0, .8], + ), + pytest.param( + [.01, .05, .5, .95, .99], + [.1, .5, 1, 1.5, 2], + [-.9, -.5, 0, .3, .6, 1], + marks=pytest.mark.slow + ), + pytest.param( + [.01, .05, .1, .25, .35, .5, .65, .75, .9, .95, .99], + np.linspace(0.1, 2, 20), + np.linspace(-1, 1, 21), + marks=pytest.mark.xslow, + ), + ] + ) + def test_cdf_nolan_samples( + self, nolan_cdf_sample_data, pct_range, alpha_range, beta_range + ): + """ Test cdf values against Nolan's stablec.exe output.""" + data = nolan_cdf_sample_data + tests = [ + # piecewise generally good accuracy + [ + 'piecewise', 2e-12, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + ~( + ( + (r['alpha'] == 1.) & + np.isin(r['beta'], [-0.3, -0.2, -0.1]) & + (r['pct'] == 0.01) + ) | + ( + (r['alpha'] == 1.) & + np.isin(r['beta'], [0.1, 0.2, 0.3]) & + (r['pct'] == 0.99) + ) + ) + ) + ], + # for some points with alpha=1, Nolan's STABLE clearly + # loses accuracy + [ + 'piecewise', 5e-2, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + ( + (r['alpha'] == 1.) & + np.isin(r['beta'], [-0.3, -0.2, -0.1]) & + (r['pct'] == 0.01) + ) | + ( + (r['alpha'] == 1.) & + np.isin(r['beta'], [0.1, 0.2, 0.3]) & + (r['pct'] == 0.99) + ) + ) + ], + # fft accuracy poor, very poor alpha < 1 + [ + 'fft-simpson', 1e-5, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 1.7) + ) + ], + [ + 'fft-simpson', 1e-4, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 1.5) & + (r['alpha'] <= 1.7) + ) + ], + [ + 'fft-simpson', 1e-3, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 1.3) & + (r['alpha'] <= 1.5) + ) + ], + [ + 'fft-simpson', 1e-2, lambda r: ( + np.isin(r['pct'], pct_range) & + np.isin(r['alpha'], alpha_range) & + np.isin(r['beta'], beta_range) & + (r['alpha'] > 1.0) & + (r['alpha'] <= 1.3) + ) + ], + ] + for ix, (default_method, rtol, + filter_func) in enumerate(tests): + stats.levy_stable.cdf_default_method = default_method + subdata = data[filter_func(data) + ] if filter_func is not None else data + with suppress_warnings() as sup: + sup.record( + RuntimeWarning, + 'Cumulative density calculations experimental for FFT' + + ' method. Use piecewise method instead.*' + ) + p = stats.levy_stable.cdf( + subdata['x'], + subdata['alpha'], + subdata['beta'], + scale=1, + loc=0 + ) + with np.errstate(over="ignore"): + subdata2 = rec_append_fields( + subdata, + ['calc', 'abserr', 'relerr'], + [ + p, + np.abs(p - subdata['p']), + np.abs(p - subdata['p']) / np.abs(subdata['p']) + ] + ) + failures = subdata2[ + (subdata2['relerr'] >= rtol) | + np.isnan(p) + ] + message = (f"cdf test {ix} failed with method '{default_method}'\n" + f"{failures.dtype.names}\n{failures}") + assert_allclose( + p, + subdata['p'], + rtol, + err_msg=message, + verbose=False + ) + + @pytest.mark.parametrize("param", [0, 1]) + @pytest.mark.parametrize("case", ["pdf", "cdf"]) + def test_location_scale( + self, nolan_loc_scale_sample_data, param, case + ): + """Tests for pdf and cdf where loc, scale are different from 0, 1 + """ + + uname = platform.uname() + is_linux_32 = uname.system == 'Linux' and "32bit" in platform.architecture()[0] + # Test seems to be unstable (see gh-17839 for a bug report on Debian + # i386), so skip it. + if is_linux_32 and case == 'pdf': + pytest.skip("Test unstable on some platforms; see gh-17839, 17859") + + data = nolan_loc_scale_sample_data + # We only test against piecewise as location/scale transforms + # are same for other methods. + stats.levy_stable.cdf_default_method = "piecewise" + stats.levy_stable.pdf_default_method = "piecewise" + + subdata = data[data["param"] == param] + stats.levy_stable.parameterization = f"S{param}" + + assert case in ["pdf", "cdf"] + function = ( + stats.levy_stable.pdf if case == "pdf" else stats.levy_stable.cdf + ) + + v1 = function( + subdata['x'], subdata['alpha'], subdata['beta'], scale=2, loc=3 + ) + assert_allclose(v1, subdata[case], 1e-5) + + @pytest.mark.parametrize( + "method,decimal_places", + [ + ['dni', 4], + ['piecewise', 4], + ] + ) + def test_pdf_alpha_equals_one_beta_non_zero(self, method, decimal_places): + """ sample points extracted from Tables and Graphs of Stable + Probability Density Functions - Donald R Holt - 1973 - p 187. + """ + xs = np.array( + [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4] + ) + density = np.array( + [ + .3183, .3096, .2925, .2622, .1591, .1587, .1599, .1635, .0637, + .0729, .0812, .0955, .0318, .0390, .0458, .0586, .0187, .0236, + .0285, .0384 + ] + ) + betas = np.array( + [ + 0, .25, .5, 1, 0, .25, .5, 1, 0, .25, .5, 1, 0, .25, .5, 1, 0, + .25, .5, 1 + ] + ) + with np.errstate(all='ignore'), suppress_warnings() as sup: + sup.filter( + category=RuntimeWarning, + message="Density calculation unstable.*" + ) + stats.levy_stable.pdf_default_method = method + # stats.levy_stable.fft_grid_spacing = 0.0001 + pdf = stats.levy_stable.pdf(xs, 1, betas, scale=1, loc=0) + assert_almost_equal( + pdf, density, decimal_places, method + ) + + @pytest.mark.parametrize( + "params,expected", + [ + [(1.48, -.22, 0, 1), (0, np.inf, np.nan, np.nan)], + [(2, .9, 10, 1.5), (10, 4.5, 0, 0)] + ] + ) + def test_stats(self, params, expected): + observed = stats.levy_stable.stats( + params[0], params[1], loc=params[2], scale=params[3], + moments='mvsk' + ) + assert_almost_equal(observed, expected) + + @pytest.mark.parametrize('alpha', [0.25, 0.5, 0.75]) + @pytest.mark.parametrize( + 'function,beta,points,expected', + [ + ( + stats.levy_stable.cdf, + 1.0, + np.linspace(-25, 0, 10), + 0.0, + ), + ( + stats.levy_stable.pdf, + 1.0, + np.linspace(-25, 0, 10), + 0.0, + ), + ( + stats.levy_stable.cdf, + -1.0, + np.linspace(0, 25, 10), + 1.0, + ), + ( + stats.levy_stable.pdf, + -1.0, + np.linspace(0, 25, 10), + 0.0, + ) + ] + ) + def test_distribution_outside_support( + self, alpha, function, beta, points, expected + ): + """Ensure the pdf/cdf routines do not return nan outside support. + + This distribution's support becomes truncated in a few special cases: + support is [mu, infty) if alpha < 1 and beta = 1 + support is (-infty, mu] if alpha < 1 and beta = -1 + Otherwise, the support is all reals. Here, mu is zero by default. + """ + assert 0 < alpha < 1 + assert_almost_equal( + function(points, alpha=alpha, beta=beta), + np.full(len(points), expected) + ) + + @pytest.mark.parametrize( + 'x,alpha,beta,expected', + # Reference values from Matlab + # format long + # alphas = [1.7720732804618808, 1.9217001522410235, 1.5654806051633634, + # 1.7420803447784388, 1.5748002527689913]; + # betas = [0.5059373136902996, -0.8779442746685926, -0.4016220341911392, + # -0.38180029468259247, -0.25200194914153684]; + # x0s = [0, 1e-4, -1e-4]; + # for x0 = x0s + # disp("x0 = " + x0) + # for ii = 1:5 + # alpha = alphas(ii); + # beta = betas(ii); + # pd = makedist('Stable','alpha',alpha,'beta',beta,'gam',1,'delta',0); + # % we need to adjust x. It is the same as x = 0 In scipy. + # x = x0 - beta * tan(pi * alpha / 2); + # disp(pd.pdf(x)) + # end + # end + [ + (0, 1.7720732804618808, 0.5059373136902996, 0.278932636798268), + (0, 1.9217001522410235, -0.8779442746685926, 0.281054757202316), + (0, 1.5654806051633634, -0.4016220341911392, 0.271282133194204), + (0, 1.7420803447784388, -0.38180029468259247, 0.280202199244247), + (0, 1.5748002527689913, -0.25200194914153684, 0.280136576218665), + ] + ) + def test_x_equal_zeta( + self, x, alpha, beta, expected + ): + """Test pdf for x equal to zeta. + + With S1 parametrization: x0 = x + zeta if alpha != 1 So, for x = 0, x0 + will be close to zeta. + + When case "x equal zeta" is not handled properly and quad_eps is not + low enough: - pdf may be less than 0 - logpdf is nan + + The points from the parametrize block are found randomly so that PDF is + less than 0. + + Reference values taken from MATLAB + https://www.mathworks.com/help/stats/stable-distribution.html + """ + stats.levy_stable.quad_eps = 1.2e-11 + + assert_almost_equal( + stats.levy_stable.pdf(x, alpha=alpha, beta=beta), + expected, + ) + + @pytest.mark.xfail + @pytest.mark.parametrize( + # See comment for test_x_equal_zeta for script for reference values + 'x,alpha,beta,expected', + [ + (1e-4, 1.7720732804618808, 0.5059373136902996, 0.278929165340670), + (1e-4, 1.9217001522410235, -0.8779442746685926, 0.281056564327953), + (1e-4, 1.5654806051633634, -0.4016220341911392, 0.271252432161167), + (1e-4, 1.7420803447784388, -0.38180029468259247, 0.280205311264134), + (1e-4, 1.5748002527689913, -0.25200194914153684, 0.280140965235426), + (-1e-4, 1.7720732804618808, 0.5059373136902996, 0.278936106741754), + (-1e-4, 1.9217001522410235, -0.8779442746685926, 0.281052948629429), + (-1e-4, 1.5654806051633634, -0.4016220341911392, 0.271275394392385), + (-1e-4, 1.7420803447784388, -0.38180029468259247, 0.280199085645099), + (-1e-4, 1.5748002527689913, -0.25200194914153684, 0.280132185432842), + ] + ) + def test_x_near_zeta( + self, x, alpha, beta, expected + ): + """Test pdf for x near zeta. + + With S1 parametrization: x0 = x + zeta if alpha != 1 So, for x = 0, x0 + will be close to zeta. + + When case "x near zeta" is not handled properly and quad_eps is not + low enough: - pdf may be less than 0 - logpdf is nan + + The points from the parametrize block are found randomly so that PDF is + less than 0. + + Reference values taken from MATLAB + https://www.mathworks.com/help/stats/stable-distribution.html + """ + stats.levy_stable.quad_eps = 1.2e-11 + + assert_almost_equal( + stats.levy_stable.pdf(x, alpha=alpha, beta=beta), + expected, + ) + + +class TestArrayArgument: # test for ticket:992 + def setup_method(self): + np.random.seed(1234) + + def test_noexception(self): + rvs = stats.norm.rvs(loc=(np.arange(5)), scale=np.ones(5), + size=(10, 5)) + assert_equal(rvs.shape, (10, 5)) + + +class TestDocstring: + def test_docstrings(self): + # See ticket #761 + if stats.rayleigh.__doc__ is not None: + assert_("rayleigh" in stats.rayleigh.__doc__.lower()) + if stats.bernoulli.__doc__ is not None: + assert_("bernoulli" in stats.bernoulli.__doc__.lower()) + + def test_no_name_arg(self): + # If name is not given, construction shouldn't fail. See #1508. + stats.rv_continuous() + stats.rv_discrete() + + +def test_args_reduce(): + a = array([1, 3, 2, 1, 2, 3, 3]) + b, c = argsreduce(a > 1, a, 2) + + assert_array_equal(b, [3, 2, 2, 3, 3]) + assert_array_equal(c, [2]) + + b, c = argsreduce(2 > 1, a, 2) + assert_array_equal(b, a) + assert_array_equal(c, [2] * np.size(a)) + + b, c = argsreduce(a > 0, a, 2) + assert_array_equal(b, a) + assert_array_equal(c, [2] * np.size(a)) + + +class TestFitMethod: + skip = ['ncf', 'ksone', 'kstwo'] + + def setup_method(self): + np.random.seed(1234) + + # skip these b/c deprecated, or only loc and scale arguments + fitSkipNonFinite = ['expon', 'norm', 'uniform'] + + @pytest.mark.parametrize('dist,args', distcont) + def test_fit_w_non_finite_data_values(self, dist, args): + """gh-10300""" + if dist in self.fitSkipNonFinite: + pytest.skip("%s fit known to fail or deprecated" % dist) + x = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.nan]) + y = np.array([1.6483, 2.7169, 2.4667, 1.1791, 3.5433, np.inf]) + distfunc = getattr(stats, dist) + assert_raises(ValueError, distfunc.fit, x, fscale=1) + assert_raises(ValueError, distfunc.fit, y, fscale=1) + + def test_fix_fit_2args_lognorm(self): + # Regression test for #1551. + np.random.seed(12345) + with np.errstate(all='ignore'): + x = stats.lognorm.rvs(0.25, 0., 20.0, size=20) + expected_shape = np.sqrt(((np.log(x) - np.log(20))**2).mean()) + assert_allclose(np.array(stats.lognorm.fit(x, floc=0, fscale=20)), + [expected_shape, 0, 20], atol=1e-8) + + def test_fix_fit_norm(self): + x = np.arange(1, 6) + + loc, scale = stats.norm.fit(x) + assert_almost_equal(loc, 3) + assert_almost_equal(scale, np.sqrt(2)) + + loc, scale = stats.norm.fit(x, floc=2) + assert_equal(loc, 2) + assert_equal(scale, np.sqrt(3)) + + loc, scale = stats.norm.fit(x, fscale=2) + assert_almost_equal(loc, 3) + assert_equal(scale, 2) + + def test_fix_fit_gamma(self): + x = np.arange(1, 6) + meanlog = np.log(x).mean() + + # A basic test of gamma.fit with floc=0. + floc = 0 + a, loc, scale = stats.gamma.fit(x, floc=floc) + s = np.log(x.mean()) - meanlog + assert_almost_equal(np.log(a) - special.digamma(a), s, decimal=5) + assert_equal(loc, floc) + assert_almost_equal(scale, x.mean()/a, decimal=8) + + # Regression tests for gh-2514. + # The problem was that if `floc=0` was given, any other fixed + # parameters were ignored. + f0 = 1 + floc = 0 + a, loc, scale = stats.gamma.fit(x, f0=f0, floc=floc) + assert_equal(a, f0) + assert_equal(loc, floc) + assert_almost_equal(scale, x.mean()/a, decimal=8) + + f0 = 2 + floc = 0 + a, loc, scale = stats.gamma.fit(x, f0=f0, floc=floc) + assert_equal(a, f0) + assert_equal(loc, floc) + assert_almost_equal(scale, x.mean()/a, decimal=8) + + # loc and scale fixed. + floc = 0 + fscale = 2 + a, loc, scale = stats.gamma.fit(x, floc=floc, fscale=fscale) + assert_equal(loc, floc) + assert_equal(scale, fscale) + c = meanlog - np.log(fscale) + assert_almost_equal(special.digamma(a), c) + + def test_fix_fit_beta(self): + # Test beta.fit when both floc and fscale are given. + + def mlefunc(a, b, x): + # Zeros of this function are critical points of + # the maximum likelihood function. + n = len(x) + s1 = np.log(x).sum() + s2 = np.log(1-x).sum() + psiab = special.psi(a + b) + func = [s1 - n * (-psiab + special.psi(a)), + s2 - n * (-psiab + special.psi(b))] + return func + + # Basic test with floc and fscale given. + x = np.array([0.125, 0.25, 0.5]) + a, b, loc, scale = stats.beta.fit(x, floc=0, fscale=1) + assert_equal(loc, 0) + assert_equal(scale, 1) + assert_allclose(mlefunc(a, b, x), [0, 0], atol=1e-6) + + # Basic test with f0, floc and fscale given. + # This is also a regression test for gh-2514. + x = np.array([0.125, 0.25, 0.5]) + a, b, loc, scale = stats.beta.fit(x, f0=2, floc=0, fscale=1) + assert_equal(a, 2) + assert_equal(loc, 0) + assert_equal(scale, 1) + da, db = mlefunc(a, b, x) + assert_allclose(db, 0, atol=1e-5) + + # Same floc and fscale values as above, but reverse the data + # and fix b (f1). + x2 = 1 - x + a2, b2, loc2, scale2 = stats.beta.fit(x2, f1=2, floc=0, fscale=1) + assert_equal(b2, 2) + assert_equal(loc2, 0) + assert_equal(scale2, 1) + da, db = mlefunc(a2, b2, x2) + assert_allclose(da, 0, atol=1e-5) + # a2 of this test should equal b from above. + assert_almost_equal(a2, b) + + # Check for detection of data out of bounds when floc and fscale + # are given. + assert_raises(ValueError, stats.beta.fit, x, floc=0.5, fscale=1) + y = np.array([0, .5, 1]) + assert_raises(ValueError, stats.beta.fit, y, floc=0, fscale=1) + assert_raises(ValueError, stats.beta.fit, y, floc=0, fscale=1, f0=2) + assert_raises(ValueError, stats.beta.fit, y, floc=0, fscale=1, f1=2) + + # Check that attempting to fix all the parameters raises a ValueError. + assert_raises(ValueError, stats.beta.fit, y, f0=0, f1=1, + floc=2, fscale=3) + + def test_expon_fit(self): + x = np.array([2, 2, 4, 4, 4, 4, 4, 8]) + + loc, scale = stats.expon.fit(x) + assert_equal(loc, 2) # x.min() + assert_equal(scale, 2) # x.mean() - x.min() + + loc, scale = stats.expon.fit(x, fscale=3) + assert_equal(loc, 2) # x.min() + assert_equal(scale, 3) # fscale + + loc, scale = stats.expon.fit(x, floc=0) + assert_equal(loc, 0) # floc + assert_equal(scale, 4) # x.mean() - loc + + def test_lognorm_fit(self): + x = np.array([1.5, 3, 10, 15, 23, 59]) + lnxm1 = np.log(x - 1) + + shape, loc, scale = stats.lognorm.fit(x, floc=1) + assert_allclose(shape, lnxm1.std(), rtol=1e-12) + assert_equal(loc, 1) + assert_allclose(scale, np.exp(lnxm1.mean()), rtol=1e-12) + + shape, loc, scale = stats.lognorm.fit(x, floc=1, fscale=6) + assert_allclose(shape, np.sqrt(((lnxm1 - np.log(6))**2).mean()), + rtol=1e-12) + assert_equal(loc, 1) + assert_equal(scale, 6) + + shape, loc, scale = stats.lognorm.fit(x, floc=1, fix_s=0.75) + assert_equal(shape, 0.75) + assert_equal(loc, 1) + assert_allclose(scale, np.exp(lnxm1.mean()), rtol=1e-12) + + def test_uniform_fit(self): + x = np.array([1.0, 1.1, 1.2, 9.0]) + + loc, scale = stats.uniform.fit(x) + assert_equal(loc, x.min()) + assert_equal(scale, np.ptp(x)) + + loc, scale = stats.uniform.fit(x, floc=0) + assert_equal(loc, 0) + assert_equal(scale, x.max()) + + loc, scale = stats.uniform.fit(x, fscale=10) + assert_equal(loc, 0) + assert_equal(scale, 10) + + assert_raises(ValueError, stats.uniform.fit, x, floc=2.0) + assert_raises(ValueError, stats.uniform.fit, x, fscale=5.0) + + @pytest.mark.slow + @pytest.mark.parametrize("method", ["MLE", "MM"]) + def test_fshapes(self, method): + # take a beta distribution, with shapes='a, b', and make sure that + # fa is equivalent to f0, and fb is equivalent to f1 + a, b = 3., 4. + x = stats.beta.rvs(a, b, size=100, random_state=1234) + res_1 = stats.beta.fit(x, f0=3., method=method) + res_2 = stats.beta.fit(x, fa=3., method=method) + assert_allclose(res_1, res_2, atol=1e-12, rtol=1e-12) + + res_2 = stats.beta.fit(x, fix_a=3., method=method) + assert_allclose(res_1, res_2, atol=1e-12, rtol=1e-12) + + res_3 = stats.beta.fit(x, f1=4., method=method) + res_4 = stats.beta.fit(x, fb=4., method=method) + assert_allclose(res_3, res_4, atol=1e-12, rtol=1e-12) + + res_4 = stats.beta.fit(x, fix_b=4., method=method) + assert_allclose(res_3, res_4, atol=1e-12, rtol=1e-12) + + # cannot specify both positional and named args at the same time + assert_raises(ValueError, stats.beta.fit, x, fa=1, f0=2, method=method) + + # check that attempting to fix all parameters raises a ValueError + assert_raises(ValueError, stats.beta.fit, x, fa=0, f1=1, + floc=2, fscale=3, method=method) + + # check that specifying floc, fscale and fshapes works for + # beta and gamma which override the generic fit method + res_5 = stats.beta.fit(x, fa=3., floc=0, fscale=1, method=method) + aa, bb, ll, ss = res_5 + assert_equal([aa, ll, ss], [3., 0, 1]) + + # gamma distribution + a = 3. + data = stats.gamma.rvs(a, size=100) + aa, ll, ss = stats.gamma.fit(data, fa=a, method=method) + assert_equal(aa, a) + + @pytest.mark.parametrize("method", ["MLE", "MM"]) + def test_extra_params(self, method): + # unknown parameters should raise rather than be silently ignored + dist = stats.exponnorm + data = dist.rvs(K=2, size=100) + dct = dict(enikibeniki=-101) + assert_raises(TypeError, dist.fit, data, **dct, method=method) + + +class TestFrozen: + def setup_method(self): + np.random.seed(1234) + + # Test that a frozen distribution gives the same results as the original + # object. + # + # Only tested for the normal distribution (with loc and scale specified) + # and for the gamma distribution (with a shape parameter specified). + def test_norm(self): + dist = stats.norm + frozen = stats.norm(loc=10.0, scale=3.0) + + result_f = frozen.pdf(20.0) + result = dist.pdf(20.0, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.cdf(20.0) + result = dist.cdf(20.0, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.ppf(0.25) + result = dist.ppf(0.25, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.isf(0.25) + result = dist.isf(0.25, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.sf(10.0) + result = dist.sf(10.0, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.median() + result = dist.median(loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.mean() + result = dist.mean(loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.var() + result = dist.var(loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.std() + result = dist.std(loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.entropy() + result = dist.entropy(loc=10.0, scale=3.0) + assert_equal(result_f, result) + + result_f = frozen.moment(2) + result = dist.moment(2, loc=10.0, scale=3.0) + assert_equal(result_f, result) + + assert_equal(frozen.a, dist.a) + assert_equal(frozen.b, dist.b) + + def test_gamma(self): + a = 2.0 + dist = stats.gamma + frozen = stats.gamma(a) + + result_f = frozen.pdf(20.0) + result = dist.pdf(20.0, a) + assert_equal(result_f, result) + + result_f = frozen.cdf(20.0) + result = dist.cdf(20.0, a) + assert_equal(result_f, result) + + result_f = frozen.ppf(0.25) + result = dist.ppf(0.25, a) + assert_equal(result_f, result) + + result_f = frozen.isf(0.25) + result = dist.isf(0.25, a) + assert_equal(result_f, result) + + result_f = frozen.sf(10.0) + result = dist.sf(10.0, a) + assert_equal(result_f, result) + + result_f = frozen.median() + result = dist.median(a) + assert_equal(result_f, result) + + result_f = frozen.mean() + result = dist.mean(a) + assert_equal(result_f, result) + + result_f = frozen.var() + result = dist.var(a) + assert_equal(result_f, result) + + result_f = frozen.std() + result = dist.std(a) + assert_equal(result_f, result) + + result_f = frozen.entropy() + result = dist.entropy(a) + assert_equal(result_f, result) + + result_f = frozen.moment(2) + result = dist.moment(2, a) + assert_equal(result_f, result) + + assert_equal(frozen.a, frozen.dist.a) + assert_equal(frozen.b, frozen.dist.b) + + def test_regression_ticket_1293(self): + # Create a frozen distribution. + frozen = stats.lognorm(1) + # Call one of its methods that does not take any keyword arguments. + m1 = frozen.moment(2) + # Now call a method that takes a keyword argument. + frozen.stats(moments='mvsk') + # Call moment(2) again. + # After calling stats(), the following was raising an exception. + # So this test passes if the following does not raise an exception. + m2 = frozen.moment(2) + # The following should also be true, of course. But it is not + # the focus of this test. + assert_equal(m1, m2) + + def test_ab(self): + # test that the support of a frozen distribution + # (i) remains frozen even if it changes for the original one + # (ii) is actually correct if the shape parameters are such that + # the values of [a, b] are not the default [0, inf] + # take a genpareto as an example where the support + # depends on the value of the shape parameter: + # for c > 0: a, b = 0, inf + # for c < 0: a, b = 0, -1/c + + c = -0.1 + rv = stats.genpareto(c=c) + a, b = rv.dist._get_support(c) + assert_equal([a, b], [0., 10.]) + + c = 0.1 + stats.genpareto.pdf(0, c=c) + assert_equal(rv.dist._get_support(c), [0, np.inf]) + + c = -0.1 + rv = stats.genpareto(c=c) + a, b = rv.dist._get_support(c) + assert_equal([a, b], [0., 10.]) + + c = 0.1 + stats.genpareto.pdf(0, c) # this should NOT change genpareto.b + assert_equal((rv.dist.a, rv.dist.b), stats.genpareto._get_support(c)) + + rv1 = stats.genpareto(c=0.1) + assert_(rv1.dist is not rv.dist) + + # c >= 0: a, b = [0, inf] + for c in [1., 0.]: + c = np.asarray(c) + rv = stats.genpareto(c=c) + a, b = rv.a, rv.b + assert_equal(a, 0.) + assert_(np.isposinf(b)) + + # c < 0: a=0, b=1/|c| + c = np.asarray(-2.) + a, b = stats.genpareto._get_support(c) + assert_allclose([a, b], [0., 0.5]) + + def test_rv_frozen_in_namespace(self): + # Regression test for gh-3522 + assert_(hasattr(stats.distributions, 'rv_frozen')) + + def test_random_state(self): + # only check that the random_state attribute exists, + frozen = stats.norm() + assert_(hasattr(frozen, 'random_state')) + + # ... that it can be set, + frozen.random_state = 42 + assert_equal(frozen.random_state.get_state(), + np.random.RandomState(42).get_state()) + + # ... and that .rvs method accepts it as an argument + rndm = np.random.RandomState(1234) + frozen.rvs(size=8, random_state=rndm) + + def test_pickling(self): + # test that a frozen instance pickles and unpickles + # (this method is a clone of common_tests.check_pickling) + beta = stats.beta(2.3098496451481823, 0.62687954300963677) + poiss = stats.poisson(3.) + sample = stats.rv_discrete(values=([0, 1, 2, 3], + [0.1, 0.2, 0.3, 0.4])) + + for distfn in [beta, poiss, sample]: + distfn.random_state = 1234 + distfn.rvs(size=8) + s = pickle.dumps(distfn) + r0 = distfn.rvs(size=8) + + unpickled = pickle.loads(s) + r1 = unpickled.rvs(size=8) + assert_equal(r0, r1) + + # also smoke test some methods + medians = [distfn.ppf(0.5), unpickled.ppf(0.5)] + assert_equal(medians[0], medians[1]) + assert_equal(distfn.cdf(medians[0]), + unpickled.cdf(medians[1])) + + def test_expect(self): + # smoke test the expect method of the frozen distribution + # only take a gamma w/loc and scale and poisson with loc specified + def func(x): + return x + + gm = stats.gamma(a=2, loc=3, scale=4) + with np.errstate(invalid="ignore", divide="ignore"): + gm_val = gm.expect(func, lb=1, ub=2, conditional=True) + gamma_val = stats.gamma.expect(func, args=(2,), loc=3, scale=4, + lb=1, ub=2, conditional=True) + assert_allclose(gm_val, gamma_val) + + p = stats.poisson(3, loc=4) + p_val = p.expect(func) + poisson_val = stats.poisson.expect(func, args=(3,), loc=4) + assert_allclose(p_val, poisson_val) + + +class TestExpect: + # Test for expect method. + # + # Uses normal distribution and beta distribution for finite bounds, and + # hypergeom for discrete distribution with finite support + def test_norm(self): + v = stats.norm.expect(lambda x: (x-5)*(x-5), loc=5, scale=2) + assert_almost_equal(v, 4, decimal=14) + + m = stats.norm.expect(lambda x: (x), loc=5, scale=2) + assert_almost_equal(m, 5, decimal=14) + + lb = stats.norm.ppf(0.05, loc=5, scale=2) + ub = stats.norm.ppf(0.95, loc=5, scale=2) + prob90 = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub) + assert_almost_equal(prob90, 0.9, decimal=14) + + prob90c = stats.norm.expect(lambda x: 1, loc=5, scale=2, lb=lb, ub=ub, + conditional=True) + assert_almost_equal(prob90c, 1., decimal=14) + + def test_beta(self): + # case with finite support interval + v = stats.beta.expect(lambda x: (x-19/3.)*(x-19/3.), args=(10, 5), + loc=5, scale=2) + assert_almost_equal(v, 1./18., decimal=13) + + m = stats.beta.expect(lambda x: x, args=(10, 5), loc=5., scale=2.) + assert_almost_equal(m, 19/3., decimal=13) + + ub = stats.beta.ppf(0.95, 10, 10, loc=5, scale=2) + lb = stats.beta.ppf(0.05, 10, 10, loc=5, scale=2) + prob90 = stats.beta.expect(lambda x: 1., args=(10, 10), loc=5., + scale=2., lb=lb, ub=ub, conditional=False) + assert_almost_equal(prob90, 0.9, decimal=13) + + prob90c = stats.beta.expect(lambda x: 1, args=(10, 10), loc=5, + scale=2, lb=lb, ub=ub, conditional=True) + assert_almost_equal(prob90c, 1., decimal=13) + + def test_hypergeom(self): + # test case with finite bounds + + # without specifying bounds + m_true, v_true = stats.hypergeom.stats(20, 10, 8, loc=5.) + m = stats.hypergeom.expect(lambda x: x, args=(20, 10, 8), loc=5.) + assert_almost_equal(m, m_true, decimal=13) + + v = stats.hypergeom.expect(lambda x: (x-9.)**2, args=(20, 10, 8), + loc=5.) + assert_almost_equal(v, v_true, decimal=14) + + # with bounds, bounds equal to shifted support + v_bounds = stats.hypergeom.expect(lambda x: (x-9.)**2, + args=(20, 10, 8), + loc=5., lb=5, ub=13) + assert_almost_equal(v_bounds, v_true, decimal=14) + + # drop boundary points + prob_true = 1-stats.hypergeom.pmf([5, 13], 20, 10, 8, loc=5).sum() + prob_bounds = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), + loc=5., lb=6, ub=12) + assert_almost_equal(prob_bounds, prob_true, decimal=13) + + # conditional + prob_bc = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), loc=5., + lb=6, ub=12, conditional=True) + assert_almost_equal(prob_bc, 1, decimal=14) + + # check simple integral + prob_b = stats.hypergeom.expect(lambda x: 1, args=(20, 10, 8), + lb=0, ub=8) + assert_almost_equal(prob_b, 1, decimal=13) + + def test_poisson(self): + # poisson, use lower bound only + prob_bounds = stats.poisson.expect(lambda x: 1, args=(2,), lb=3, + conditional=False) + prob_b_true = 1-stats.poisson.cdf(2, 2) + assert_almost_equal(prob_bounds, prob_b_true, decimal=14) + + prob_lb = stats.poisson.expect(lambda x: 1, args=(2,), lb=2, + conditional=True) + assert_almost_equal(prob_lb, 1, decimal=14) + + def test_genhalflogistic(self): + # genhalflogistic, changes upper bound of support in _argcheck + # regression test for gh-2622 + halflog = stats.genhalflogistic + # check consistency when calling expect twice with the same input + res1 = halflog.expect(args=(1.5,)) + halflog.expect(args=(0.5,)) + res2 = halflog.expect(args=(1.5,)) + assert_almost_equal(res1, res2, decimal=14) + + def test_rice_overflow(self): + # rice.pdf(999, 0.74) was inf since special.i0 silentyly overflows + # check that using i0e fixes it + assert_(np.isfinite(stats.rice.pdf(999, 0.74))) + + assert_(np.isfinite(stats.rice.expect(lambda x: 1, args=(0.74,)))) + assert_(np.isfinite(stats.rice.expect(lambda x: 2, args=(0.74,)))) + assert_(np.isfinite(stats.rice.expect(lambda x: 3, args=(0.74,)))) + + def test_logser(self): + # test a discrete distribution with infinite support and loc + p, loc = 0.3, 3 + res_0 = stats.logser.expect(lambda k: k, args=(p,)) + # check against the correct answer (sum of a geom series) + assert_allclose(res_0, + p / (p - 1.) / np.log(1. - p), atol=1e-15) + + # now check it with `loc` + res_l = stats.logser.expect(lambda k: k, args=(p,), loc=loc) + assert_allclose(res_l, res_0 + loc, atol=1e-15) + + def test_skellam(self): + # Use a discrete distribution w/ bi-infinite support. Compute two first + # moments and compare to known values (cf skellam.stats) + p1, p2 = 18, 22 + m1 = stats.skellam.expect(lambda x: x, args=(p1, p2)) + m2 = stats.skellam.expect(lambda x: x**2, args=(p1, p2)) + assert_allclose(m1, p1 - p2, atol=1e-12) + assert_allclose(m2 - m1**2, p1 + p2, atol=1e-12) + + def test_randint(self): + # Use a discrete distribution w/ parameter-dependent support, which + # is larger than the default chunksize + lo, hi = 0, 113 + res = stats.randint.expect(lambda x: x, (lo, hi)) + assert_allclose(res, + sum(_ for _ in range(lo, hi)) / (hi - lo), atol=1e-15) + + def test_zipf(self): + # Test that there is no infinite loop even if the sum diverges + assert_warns(RuntimeWarning, stats.zipf.expect, + lambda x: x**2, (2,)) + + def test_discrete_kwds(self): + # check that discrete expect accepts keywords to control the summation + n0 = stats.poisson.expect(lambda x: 1, args=(2,)) + n1 = stats.poisson.expect(lambda x: 1, args=(2,), + maxcount=1001, chunksize=32, tolerance=1e-8) + assert_almost_equal(n0, n1, decimal=14) + + def test_moment(self): + # test the .moment() method: compute a higher moment and compare to + # a known value + def poiss_moment5(mu): + return mu**5 + 10*mu**4 + 25*mu**3 + 15*mu**2 + mu + + for mu in [5, 7]: + m5 = stats.poisson.moment(5, mu) + assert_allclose(m5, poiss_moment5(mu), rtol=1e-10) + + def test_challenging_cases_gh8928(self): + # Several cases where `expect` failed to produce a correct result were + # reported in gh-8928. Check that these cases have been resolved. + assert_allclose(stats.norm.expect(loc=36, scale=1.0), 36) + assert_allclose(stats.norm.expect(loc=40, scale=1.0), 40) + assert_allclose(stats.norm.expect(loc=10, scale=0.1), 10) + assert_allclose(stats.gamma.expect(args=(148,)), 148) + assert_allclose(stats.logistic.expect(loc=85), 85) + + def test_lb_ub_gh15855(self): + # Make sure changes to `expect` made in gh15855 treat lb/ub correctly + dist = stats.uniform + ref = dist.mean(loc=10, scale=5) # 12.5 + # moment over whole distribution + assert_allclose(dist.expect(loc=10, scale=5), ref) + # moment over whole distribution, lb and ub outside of support + assert_allclose(dist.expect(loc=10, scale=5, lb=9, ub=16), ref) + # moment over 60% of distribution, [lb, ub] centered within support + assert_allclose(dist.expect(loc=10, scale=5, lb=11, ub=14), ref*0.6) + # moment over truncated distribution, essentially + assert_allclose(dist.expect(loc=10, scale=5, lb=11, ub=14, + conditional=True), ref) + # moment over 40% of distribution, [lb, ub] not centered within support + assert_allclose(dist.expect(loc=10, scale=5, lb=11, ub=13), 12*0.4) + # moment with lb > ub + assert_allclose(dist.expect(loc=10, scale=5, lb=13, ub=11), -12*0.4) + # moment with lb > ub, conditional + assert_allclose(dist.expect(loc=10, scale=5, lb=13, ub=11, + conditional=True), 12) + + +class TestNct: + def test_nc_parameter(self): + # Parameter values c<=0 were not enabled (gh-2402). + # For negative values c and for c=0 results of rv.cdf(0) below were nan + rv = stats.nct(5, 0) + assert_equal(rv.cdf(0), 0.5) + rv = stats.nct(5, -1) + assert_almost_equal(rv.cdf(0), 0.841344746069, decimal=10) + + def test_broadcasting(self): + res = stats.nct.pdf(5, np.arange(4, 7)[:, None], + np.linspace(0.1, 1, 4)) + expected = array([[0.00321886, 0.00557466, 0.00918418, 0.01442997], + [0.00217142, 0.00395366, 0.00683888, 0.01126276], + [0.00153078, 0.00291093, 0.00525206, 0.00900815]]) + assert_allclose(res, expected, rtol=1e-5) + + def test_variance_gh_issue_2401(self): + # Computation of the variance of a non-central t-distribution resulted + # in a TypeError: ufunc 'isinf' not supported for the input types, + # and the inputs could not be safely coerced to any supported types + # according to the casting rule 'safe' + rv = stats.nct(4, 0) + assert_equal(rv.var(), 2.0) + + def test_nct_inf_moments(self): + # n-th moment of nct only exists for df > n + m, v, s, k = stats.nct.stats(df=0.9, nc=0.3, moments='mvsk') + assert_equal([m, v, s, k], [np.nan, np.nan, np.nan, np.nan]) + + m, v, s, k = stats.nct.stats(df=1.9, nc=0.3, moments='mvsk') + assert_(np.isfinite(m)) + assert_equal([v, s, k], [np.nan, np.nan, np.nan]) + + m, v, s, k = stats.nct.stats(df=3.1, nc=0.3, moments='mvsk') + assert_(np.isfinite([m, v, s]).all()) + assert_equal(k, np.nan) + + def test_nct_stats_large_df_values(self): + # previously gamma function was used which lost precision at df=345 + # cf. https://github.com/scipy/scipy/issues/12919 for details + nct_mean_df_1000 = stats.nct.mean(1000, 2) + nct_stats_df_1000 = stats.nct.stats(1000, 2) + # These expected values were computed with mpmath. They were also + # verified with the Wolfram Alpha expressions: + # Mean[NoncentralStudentTDistribution[1000, 2]] + # Var[NoncentralStudentTDistribution[1000, 2]] + expected_stats_df_1000 = [2.0015015641422464, 1.0040115288163005] + assert_allclose(nct_mean_df_1000, expected_stats_df_1000[0], + rtol=1e-10) + assert_allclose(nct_stats_df_1000, expected_stats_df_1000, + rtol=1e-10) + # and a bigger df value + nct_mean = stats.nct.mean(100000, 2) + nct_stats = stats.nct.stats(100000, 2) + # These expected values were computed with mpmath. + expected_stats = [2.0000150001562518, 1.0000400011500288] + assert_allclose(nct_mean, expected_stats[0], rtol=1e-10) + assert_allclose(nct_stats, expected_stats, rtol=1e-9) + + def test_cdf_large_nc(self): + # gh-17916 reported a crash with large `nc` values + assert_allclose(stats.nct.cdf(2, 2, float(2**16)), 0) + + +class TestRecipInvGauss: + + def test_pdf_endpoint(self): + p = stats.recipinvgauss.pdf(0, 0.6) + assert p == 0.0 + + def test_logpdf_endpoint(self): + logp = stats.recipinvgauss.logpdf(0, 0.6) + assert logp == -np.inf + + def test_cdf_small_x(self): + # The expected value was computer with mpmath: + # + # import mpmath + # + # mpmath.mp.dps = 100 + # + # def recipinvgauss_cdf_mp(x, mu): + # x = mpmath.mpf(x) + # mu = mpmath.mpf(mu) + # trm1 = 1/mu - x + # trm2 = 1/mu + x + # isqx = 1/mpmath.sqrt(x) + # return (mpmath.ncdf(-isqx*trm1) + # - mpmath.exp(2/mu)*mpmath.ncdf(-isqx*trm2)) + # + p = stats.recipinvgauss.cdf(0.05, 0.5) + expected = 6.590396159501331e-20 + assert_allclose(p, expected, rtol=1e-14) + + def test_sf_large_x(self): + # The expected value was computed with mpmath; see test_cdf_small. + p = stats.recipinvgauss.sf(80, 0.5) + expected = 2.699819200556787e-18 + assert_allclose(p, expected, 5e-15) + + +class TestRice: + def test_rice_zero_b(self): + # rice distribution should work with b=0, cf gh-2164 + x = [0.2, 1., 5.] + assert_(np.isfinite(stats.rice.pdf(x, b=0.)).all()) + assert_(np.isfinite(stats.rice.logpdf(x, b=0.)).all()) + assert_(np.isfinite(stats.rice.cdf(x, b=0.)).all()) + assert_(np.isfinite(stats.rice.logcdf(x, b=0.)).all()) + + q = [0.1, 0.1, 0.5, 0.9] + assert_(np.isfinite(stats.rice.ppf(q, b=0.)).all()) + + mvsk = stats.rice.stats(0, moments='mvsk') + assert_(np.isfinite(mvsk).all()) + + # furthermore, pdf is continuous as b\to 0 + # rice.pdf(x, b\to 0) = x exp(-x^2/2) + O(b^2) + # see e.g. Abramovich & Stegun 9.6.7 & 9.6.10 + b = 1e-8 + assert_allclose(stats.rice.pdf(x, 0), stats.rice.pdf(x, b), + atol=b, rtol=0) + + def test_rice_rvs(self): + rvs = stats.rice.rvs + assert_equal(rvs(b=3.).size, 1) + assert_equal(rvs(b=3., size=(3, 5)).shape, (3, 5)) + + def test_rice_gh9836(self): + # test that gh-9836 is resolved; previously jumped to 1 at the end + + cdf = stats.rice.cdf(np.arange(10, 160, 10), np.arange(10, 160, 10)) + # Generated in R + # library(VGAM) + # options(digits=16) + # x = seq(10, 150, 10) + # print(price(x, sigma=1, vee=x)) + cdf_exp = [0.4800278103504522, 0.4900233218590353, 0.4933500379379548, + 0.4950128317658719, 0.4960103776798502, 0.4966753655438764, + 0.4971503395812474, 0.4975065620443196, 0.4977836197921638, + 0.4980052636649550, 0.4981866072661382, 0.4983377260666599, + 0.4984655952615694, 0.4985751970541413, 0.4986701850071265] + assert_allclose(cdf, cdf_exp) + + probabilities = np.arange(0.1, 1, 0.1) + ppf = stats.rice.ppf(probabilities, 500/4, scale=4) + # Generated in R + # library(VGAM) + # options(digits=16) + # p = seq(0.1, .9, by = .1) + # print(qrice(p, vee = 500, sigma = 4)) + ppf_exp = [494.8898762347361, 496.6495690858350, 497.9184315188069, + 499.0026277378915, 500.0159999146250, 501.0293721352668, + 502.1135684981884, 503.3824312270405, 505.1421247157822] + assert_allclose(ppf, ppf_exp) + + ppf = scipy.stats.rice.ppf(0.5, np.arange(10, 150, 10)) + # Generated in R + # library(VGAM) + # options(digits=16) + # b <- seq(10, 140, 10) + # print(qrice(0.5, vee = b, sigma = 1)) + ppf_exp = [10.04995862522287, 20.02499480078302, 30.01666512465732, + 40.01249934924363, 50.00999966676032, 60.00833314046875, + 70.00714273568241, 80.00624991862573, 90.00555549840364, + 100.00499995833597, 110.00454542324384, 120.00416664255323, + 130.00384613488120, 140.00357141338748] + assert_allclose(ppf, ppf_exp) + + +class TestErlang: + def setup_method(self): + np.random.seed(1234) + + def test_erlang_runtimewarning(self): + # erlang should generate a RuntimeWarning if a non-integer + # shape parameter is used. + with warnings.catch_warnings(): + warnings.simplefilter("error", RuntimeWarning) + + # The non-integer shape parameter 1.3 should trigger a + # RuntimeWarning + assert_raises(RuntimeWarning, + stats.erlang.rvs, 1.3, loc=0, scale=1, size=4) + + # Calling the fit method with `f0` set to an integer should + # *not* trigger a RuntimeWarning. It should return the same + # values as gamma.fit(...). + data = [0.5, 1.0, 2.0, 4.0] + result_erlang = stats.erlang.fit(data, f0=1) + result_gamma = stats.gamma.fit(data, f0=1) + assert_allclose(result_erlang, result_gamma, rtol=1e-3) + + def test_gh_pr_10949_argcheck(self): + assert_equal(stats.erlang.pdf(0.5, a=[1, -1]), + stats.gamma.pdf(0.5, a=[1, -1])) + + +class TestRayleigh: + def setup_method(self): + np.random.seed(987654321) + + # gh-6227 + def test_logpdf(self): + y = stats.rayleigh.logpdf(50) + assert_allclose(y, -1246.0879769945718) + + def test_logsf(self): + y = stats.rayleigh.logsf(50) + assert_allclose(y, -1250) + + @pytest.mark.parametrize("rvs_loc,rvs_scale", [(0.85373171, 0.86932204), + (0.20558821, 0.61621008)]) + def test_fit(self, rvs_loc, rvs_scale): + data = stats.rayleigh.rvs(size=250, loc=rvs_loc, scale=rvs_scale) + + def scale_mle(data, floc): + return (np.sum((data - floc) ** 2) / (2 * len(data))) ** .5 + + # when `floc` is provided, `scale` is found with an analytical formula + scale_expect = scale_mle(data, rvs_loc) + loc, scale = stats.rayleigh.fit(data, floc=rvs_loc) + assert_equal(loc, rvs_loc) + assert_equal(scale, scale_expect) + + # when `fscale` is fixed, superclass fit is used to determine `loc`. + loc, scale = stats.rayleigh.fit(data, fscale=.6) + assert_equal(scale, .6) + + # with both parameters free, one dimensional optimization is done + # over a new function that takes into account the dependent relation + # of `scale` to `loc`. + loc, scale = stats.rayleigh.fit(data) + # test that `scale` is defined by its relation to `loc` + assert_equal(scale, scale_mle(data, loc)) + + @pytest.mark.parametrize("rvs_loc,rvs_scale", [[0.74, 0.01], + [0.08464463, 0.12069025]]) + def test_fit_comparison_super_method(self, rvs_loc, rvs_scale): + # test that the objective function result of the analytical MLEs is + # less than or equal to that of the numerically optimized estimate + data = stats.rayleigh.rvs(size=250, loc=rvs_loc, scale=rvs_scale) + _assert_less_or_close_loglike(stats.rayleigh, data) + + def test_fit_warnings(self): + assert_fit_warnings(stats.rayleigh) + + def test_fit_gh17088(self): + # `rayleigh.fit` could return a location that was inconsistent with + # the data. See gh-17088. + rng = np.random.default_rng(456) + loc, scale, size = 50, 600, 500 + rvs = stats.rayleigh.rvs(loc, scale, size=size, random_state=rng) + loc_fit, _ = stats.rayleigh.fit(rvs) + assert loc_fit < np.min(rvs) + loc_fit, scale_fit = stats.rayleigh.fit(rvs, fscale=scale) + assert loc_fit < np.min(rvs) + assert scale_fit == scale + + +class TestExponWeib: + + def test_pdf_logpdf(self): + # Regression test for gh-3508. + x = 0.1 + a = 1.0 + c = 100.0 + p = stats.exponweib.pdf(x, a, c) + logp = stats.exponweib.logpdf(x, a, c) + # Expected values were computed with mpmath. + assert_allclose([p, logp], + [1.0000000000000054e-97, -223.35075402042244]) + + def test_a_is_1(self): + # For issue gh-3508. + # Check that when a=1, the pdf and logpdf methods of exponweib are the + # same as those of weibull_min. + x = np.logspace(-4, -1, 4) + a = 1 + c = 100 + + p = stats.exponweib.pdf(x, a, c) + expected = stats.weibull_min.pdf(x, c) + assert_allclose(p, expected) + + logp = stats.exponweib.logpdf(x, a, c) + expected = stats.weibull_min.logpdf(x, c) + assert_allclose(logp, expected) + + def test_a_is_1_c_is_1(self): + # When a = 1 and c = 1, the distribution is exponential. + x = np.logspace(-8, 1, 10) + a = 1 + c = 1 + + p = stats.exponweib.pdf(x, a, c) + expected = stats.expon.pdf(x) + assert_allclose(p, expected) + + logp = stats.exponweib.logpdf(x, a, c) + expected = stats.expon.logpdf(x) + assert_allclose(logp, expected) + + # Reference values were computed with mpmath, e.g: + # + # from mpmath import mp + # + # def mp_sf(x, a, c): + # x = mp.mpf(x) + # a = mp.mpf(a) + # c = mp.mpf(c) + # return -mp.powm1(-mp.expm1(-x**c)), a) + # + # mp.dps = 100 + # print(float(mp_sf(1, 2.5, 0.75))) + # + # prints + # + # 0.6823127476985246 + # + @pytest.mark.parametrize( + 'x, a, c, ref', + [(1, 2.5, 0.75, 0.6823127476985246), + (50, 2.5, 0.75, 1.7056666054719663e-08), + (125, 2.5, 0.75, 1.4534393150714602e-16), + (250, 2.5, 0.75, 1.2391389689773512e-27), + (250, 0.03125, 0.75, 1.548923711221689e-29), + (3, 0.03125, 3.0, 5.873527551689983e-14), + (2e80, 10.0, 0.02, 2.9449084156902135e-17)] + ) + def test_sf(self, x, a, c, ref): + sf = stats.exponweib.sf(x, a, c) + assert_allclose(sf, ref, rtol=1e-14) + + # Reference values were computed with mpmath, e.g. + # + # from mpmath import mp + # + # def mp_isf(p, a, c): + # p = mp.mpf(p) + # a = mp.mpf(a) + # c = mp.mpf(c) + # return (-mp.log(-mp.expm1(mp.log1p(-p)/a)))**(1/c) + # + # mp.dps = 100 + # print(float(mp_isf(0.25, 2.5, 0.75))) + # + # prints + # + # 2.8946008178158924 + # + @pytest.mark.parametrize( + 'p, a, c, ref', + [(0.25, 2.5, 0.75, 2.8946008178158924), + (3e-16, 2.5, 0.75, 121.77966713102938), + (1e-12, 1, 2, 5.256521769756932), + (2e-13, 0.03125, 3, 2.953915059484589), + (5e-14, 10.0, 0.02, 7.57094886384687e+75)] + ) + def test_isf(self, p, a, c, ref): + isf = stats.exponweib.isf(p, a, c) + assert_allclose(isf, ref, rtol=5e-14) + + +class TestFatigueLife: + + def test_sf_tail(self): + # Expected value computed with mpmath: + # import mpmath + # mpmath.mp.dps = 80 + # x = mpmath.mpf(800.0) + # c = mpmath.mpf(2.5) + # s = float(1 - mpmath.ncdf(1/c * (mpmath.sqrt(x) + # - 1/mpmath.sqrt(x)))) + # print(s) + # Output: + # 6.593376447038406e-30 + s = stats.fatiguelife.sf(800.0, 2.5) + assert_allclose(s, 6.593376447038406e-30, rtol=1e-13) + + def test_isf_tail(self): + # See test_sf_tail for the mpmath code. + p = 6.593376447038406e-30 + q = stats.fatiguelife.isf(p, 2.5) + assert_allclose(q, 800.0, rtol=1e-13) + + +class TestWeibull: + + def test_logpdf(self): + # gh-6217 + y = stats.weibull_min.logpdf(0, 1) + assert_equal(y, 0) + + def test_with_maxima_distrib(self): + # Tests for weibull_min and weibull_max. + # The expected values were computed using the symbolic algebra + # program 'maxima' with the package 'distrib', which has + # 'pdf_weibull' and 'cdf_weibull'. The mapping between the + # scipy and maxima functions is as follows: + # ----------------------------------------------------------------- + # scipy maxima + # --------------------------------- ------------------------------ + # weibull_min.pdf(x, a, scale=b) pdf_weibull(x, a, b) + # weibull_min.logpdf(x, a, scale=b) log(pdf_weibull(x, a, b)) + # weibull_min.cdf(x, a, scale=b) cdf_weibull(x, a, b) + # weibull_min.logcdf(x, a, scale=b) log(cdf_weibull(x, a, b)) + # weibull_min.sf(x, a, scale=b) 1 - cdf_weibull(x, a, b) + # weibull_min.logsf(x, a, scale=b) log(1 - cdf_weibull(x, a, b)) + # + # weibull_max.pdf(x, a, scale=b) pdf_weibull(-x, a, b) + # weibull_max.logpdf(x, a, scale=b) log(pdf_weibull(-x, a, b)) + # weibull_max.cdf(x, a, scale=b) 1 - cdf_weibull(-x, a, b) + # weibull_max.logcdf(x, a, scale=b) log(1 - cdf_weibull(-x, a, b)) + # weibull_max.sf(x, a, scale=b) cdf_weibull(-x, a, b) + # weibull_max.logsf(x, a, scale=b) log(cdf_weibull(-x, a, b)) + # ----------------------------------------------------------------- + x = 1.5 + a = 2.0 + b = 3.0 + + # weibull_min + + p = stats.weibull_min.pdf(x, a, scale=b) + assert_allclose(p, np.exp(-0.25)/3) + + lp = stats.weibull_min.logpdf(x, a, scale=b) + assert_allclose(lp, -0.25 - np.log(3)) + + c = stats.weibull_min.cdf(x, a, scale=b) + assert_allclose(c, -special.expm1(-0.25)) + + lc = stats.weibull_min.logcdf(x, a, scale=b) + assert_allclose(lc, np.log(-special.expm1(-0.25))) + + s = stats.weibull_min.sf(x, a, scale=b) + assert_allclose(s, np.exp(-0.25)) + + ls = stats.weibull_min.logsf(x, a, scale=b) + assert_allclose(ls, -0.25) + + # Also test using a large value x, for which computing the survival + # function using the CDF would result in 0. + s = stats.weibull_min.sf(30, 2, scale=3) + assert_allclose(s, np.exp(-100)) + + ls = stats.weibull_min.logsf(30, 2, scale=3) + assert_allclose(ls, -100) + + # weibull_max + x = -1.5 + + p = stats.weibull_max.pdf(x, a, scale=b) + assert_allclose(p, np.exp(-0.25)/3) + + lp = stats.weibull_max.logpdf(x, a, scale=b) + assert_allclose(lp, -0.25 - np.log(3)) + + c = stats.weibull_max.cdf(x, a, scale=b) + assert_allclose(c, np.exp(-0.25)) + + lc = stats.weibull_max.logcdf(x, a, scale=b) + assert_allclose(lc, -0.25) + + s = stats.weibull_max.sf(x, a, scale=b) + assert_allclose(s, -special.expm1(-0.25)) + + ls = stats.weibull_max.logsf(x, a, scale=b) + assert_allclose(ls, np.log(-special.expm1(-0.25))) + + # Also test using a value of x close to 0, for which computing the + # survival function using the CDF would result in 0. + s = stats.weibull_max.sf(-1e-9, 2, scale=3) + assert_allclose(s, -special.expm1(-1/9000000000000000000)) + + ls = stats.weibull_max.logsf(-1e-9, 2, scale=3) + assert_allclose(ls, np.log(-special.expm1(-1/9000000000000000000))) + + @pytest.mark.parametrize('scale', [1.0, 0.1]) + def test_delta_cdf(self, scale): + # Expected value computed with mpmath: + # + # def weibull_min_sf(x, k, scale): + # x = mpmath.mpf(x) + # k = mpmath.mpf(k) + # scale =mpmath.mpf(scale) + # return mpmath.exp(-(x/scale)**k) + # + # >>> import mpmath + # >>> mpmath.mp.dps = 60 + # >>> sf1 = weibull_min_sf(7.5, 3, 1) + # >>> sf2 = weibull_min_sf(8.0, 3, 1) + # >>> float(sf1 - sf2) + # 6.053624060118734e-184 + # + delta = stats.weibull_min._delta_cdf(scale*7.5, scale*8, 3, + scale=scale) + assert_allclose(delta, 6.053624060118734e-184) + + def test_fit_min(self): + rng = np.random.default_rng(5985959307161735394) + + c, loc, scale = 2, 3.5, 0.5 # arbitrary, valid parameters + dist = stats.weibull_min(c, loc, scale) + rvs = dist.rvs(size=100, random_state=rng) + + # test that MLE still honors guesses and fixed parameters + c2, loc2, scale2 = stats.weibull_min.fit(rvs, 1.5, floc=3) + c3, loc3, scale3 = stats.weibull_min.fit(rvs, 1.6, floc=3) + assert loc2 == loc3 == 3 # fixed parameter is respected + assert c2 != c3 # different guess -> (slightly) different outcome + # quality of fit is tested elsewhere + + # test that MoM honors fixed parameters, accepts (but ignores) guesses + c4, loc4, scale4 = stats.weibull_min.fit(rvs, 3, fscale=3, method='mm') + assert scale4 == 3 + # because scale was fixed, only the mean and skewness will be matched + dist4 = stats.weibull_min(c4, loc4, scale4) + res = dist4.stats(moments='ms') + ref = np.mean(rvs), stats.skew(rvs) + assert_allclose(res, ref) + + # reference values were computed via mpmath + # from mpmath import mp + # def weibull_sf_mpmath(x, c): + # x = mp.mpf(x) + # c = mp.mpf(c) + # return float(mp.exp(-x**c)) + + @pytest.mark.parametrize('x, c, ref', [(50, 1, 1.9287498479639178e-22), + (1000, 0.8, + 8.131269637872743e-110)]) + def test_sf_isf(self, x, c, ref): + assert_allclose(stats.weibull_min.sf(x, c), ref, rtol=5e-14) + assert_allclose(stats.weibull_min.isf(ref, c), x, rtol=5e-14) + + +class TestDweibull: + def test_entropy(self): + # Test that dweibull entropy follows that of weibull_min. + # (Generic tests check that the dweibull entropy is consistent + # with its PDF. As for accuracy, dweibull entropy should be just + # as accurate as weibull_min entropy. Checks of accuracy against + # a reference need only be applied to the fundamental distribution - + # weibull_min.) + rng = np.random.default_rng(8486259129157041777) + c = 10**rng.normal(scale=100, size=10) + res = stats.dweibull.entropy(c) + ref = stats.weibull_min.entropy(c) - np.log(0.5) + assert_allclose(res, ref, rtol=1e-15) + + def test_sf(self): + # test that for positive values the dweibull survival function is half + # the weibull_min survival function + rng = np.random.default_rng(8486259129157041777) + c = 10**rng.normal(scale=1, size=10) + x = 10 * rng.uniform() + res = stats.dweibull.sf(x, c) + ref = 0.5 * stats.weibull_min.sf(x, c) + assert_allclose(res, ref, rtol=1e-15) + + +class TestTruncWeibull: + + def test_pdf_bounds(self): + # test bounds + y = stats.truncweibull_min.pdf([0.1, 2.0], 2.0, 0.11, 1.99) + assert_equal(y, [0.0, 0.0]) + + def test_logpdf(self): + y = stats.truncweibull_min.logpdf(2.0, 1.0, 2.0, np.inf) + assert_equal(y, 0.0) + + # hand calculation + y = stats.truncweibull_min.logpdf(2.0, 1.0, 2.0, 4.0) + assert_allclose(y, 0.14541345786885884) + + def test_ppf_bounds(self): + # test bounds + y = stats.truncweibull_min.ppf([0.0, 1.0], 2.0, 0.1, 2.0) + assert_equal(y, [0.1, 2.0]) + + def test_cdf_to_ppf(self): + q = [0., 0.1, .25, 0.50, 0.75, 0.90, 1.] + x = stats.truncweibull_min.ppf(q, 2., 0., 3.) + q_out = stats.truncweibull_min.cdf(x, 2., 0., 3.) + assert_allclose(q, q_out) + + def test_sf_to_isf(self): + q = [0., 0.1, .25, 0.50, 0.75, 0.90, 1.] + x = stats.truncweibull_min.isf(q, 2., 0., 3.) + q_out = stats.truncweibull_min.sf(x, 2., 0., 3.) + assert_allclose(q, q_out) + + def test_munp(self): + c = 2. + a = 1. + b = 3. + + def xnpdf(x, n): + return x**n*stats.truncweibull_min.pdf(x, c, a, b) + + m0 = stats.truncweibull_min.moment(0, c, a, b) + assert_equal(m0, 1.) + + m1 = stats.truncweibull_min.moment(1, c, a, b) + m1_expected, _ = quad(lambda x: xnpdf(x, 1), a, b) + assert_allclose(m1, m1_expected) + + m2 = stats.truncweibull_min.moment(2, c, a, b) + m2_expected, _ = quad(lambda x: xnpdf(x, 2), a, b) + assert_allclose(m2, m2_expected) + + m3 = stats.truncweibull_min.moment(3, c, a, b) + m3_expected, _ = quad(lambda x: xnpdf(x, 3), a, b) + assert_allclose(m3, m3_expected) + + m4 = stats.truncweibull_min.moment(4, c, a, b) + m4_expected, _ = quad(lambda x: xnpdf(x, 4), a, b) + assert_allclose(m4, m4_expected) + + def test_reference_values(self): + a = 1. + b = 3. + c = 2. + x_med = np.sqrt(1 - np.log(0.5 + np.exp(-(8. + np.log(2.))))) + + cdf = stats.truncweibull_min.cdf(x_med, c, a, b) + assert_allclose(cdf, 0.5) + + lc = stats.truncweibull_min.logcdf(x_med, c, a, b) + assert_allclose(lc, -np.log(2.)) + + ppf = stats.truncweibull_min.ppf(0.5, c, a, b) + assert_allclose(ppf, x_med) + + sf = stats.truncweibull_min.sf(x_med, c, a, b) + assert_allclose(sf, 0.5) + + ls = stats.truncweibull_min.logsf(x_med, c, a, b) + assert_allclose(ls, -np.log(2.)) + + isf = stats.truncweibull_min.isf(0.5, c, a, b) + assert_allclose(isf, x_med) + + def test_compare_weibull_min(self): + # Verify that the truncweibull_min distribution gives the same results + # as the original weibull_min + x = 1.5 + c = 2.0 + a = 0.0 + b = np.inf + scale = 3.0 + + p = stats.weibull_min.pdf(x, c, scale=scale) + p_trunc = stats.truncweibull_min.pdf(x, c, a, b, scale=scale) + assert_allclose(p, p_trunc) + + lp = stats.weibull_min.logpdf(x, c, scale=scale) + lp_trunc = stats.truncweibull_min.logpdf(x, c, a, b, scale=scale) + assert_allclose(lp, lp_trunc) + + cdf = stats.weibull_min.cdf(x, c, scale=scale) + cdf_trunc = stats.truncweibull_min.cdf(x, c, a, b, scale=scale) + assert_allclose(cdf, cdf_trunc) + + lc = stats.weibull_min.logcdf(x, c, scale=scale) + lc_trunc = stats.truncweibull_min.logcdf(x, c, a, b, scale=scale) + assert_allclose(lc, lc_trunc) + + s = stats.weibull_min.sf(x, c, scale=scale) + s_trunc = stats.truncweibull_min.sf(x, c, a, b, scale=scale) + assert_allclose(s, s_trunc) + + ls = stats.weibull_min.logsf(x, c, scale=scale) + ls_trunc = stats.truncweibull_min.logsf(x, c, a, b, scale=scale) + assert_allclose(ls, ls_trunc) + + # # Also test using a large value x, for which computing the survival + # # function using the CDF would result in 0. + s = stats.truncweibull_min.sf(30, 2, a, b, scale=3) + assert_allclose(s, np.exp(-100)) + + ls = stats.truncweibull_min.logsf(30, 2, a, b, scale=3) + assert_allclose(ls, -100) + + def test_compare_weibull_min2(self): + # Verify that the truncweibull_min distribution PDF and CDF results + # are the same as those calculated from truncating weibull_min + c, a, b = 2.5, 0.25, 1.25 + x = np.linspace(a, b, 100) + + pdf1 = stats.truncweibull_min.pdf(x, c, a, b) + cdf1 = stats.truncweibull_min.cdf(x, c, a, b) + + norm = stats.weibull_min.cdf(b, c) - stats.weibull_min.cdf(a, c) + pdf2 = stats.weibull_min.pdf(x, c) / norm + cdf2 = (stats.weibull_min.cdf(x, c) - stats.weibull_min.cdf(a, c))/norm + + np.testing.assert_allclose(pdf1, pdf2) + np.testing.assert_allclose(cdf1, cdf2) + + +class TestRdist: + def test_rdist_cdf_gh1285(self): + # check workaround in rdist._cdf for issue gh-1285. + distfn = stats.rdist + values = [0.001, 0.5, 0.999] + assert_almost_equal(distfn.cdf(distfn.ppf(values, 541.0), 541.0), + values, decimal=5) + + def test_rdist_beta(self): + # rdist is a special case of stats.beta + x = np.linspace(-0.99, 0.99, 10) + c = 2.7 + assert_almost_equal(0.5*stats.beta(c/2, c/2).pdf((x + 1)/2), + stats.rdist(c).pdf(x)) + + # reference values were computed via mpmath + # from mpmath import mp + # mp.dps = 200 + # def rdist_sf_mpmath(x, c): + # x = mp.mpf(x) + # c = mp.mpf(c) + # return float(mp.betainc(c/2, c/2, (x+1)/2, mp.one, regularized=True)) + @pytest.mark.parametrize( + "x, c, ref", + [ + (0.0001, 541, 0.49907251345565845), + (0.1, 241, 0.06000788166249205), + (0.5, 441, 1.0655898106047832e-29), + (0.8, 341, 6.025478373732215e-78), + ] + ) + def test_rdist_sf(self, x, c, ref): + assert_allclose(stats.rdist.sf(x, c), ref, rtol=5e-14) + + +class TestTrapezoid: + def test_reduces_to_triang(self): + modes = [0, 0.3, 0.5, 1] + for mode in modes: + x = [0, mode, 1] + assert_almost_equal(stats.trapezoid.pdf(x, mode, mode), + stats.triang.pdf(x, mode)) + assert_almost_equal(stats.trapezoid.cdf(x, mode, mode), + stats.triang.cdf(x, mode)) + + def test_reduces_to_uniform(self): + x = np.linspace(0, 1, 10) + assert_almost_equal(stats.trapezoid.pdf(x, 0, 1), stats.uniform.pdf(x)) + assert_almost_equal(stats.trapezoid.cdf(x, 0, 1), stats.uniform.cdf(x)) + + def test_cases(self): + # edge cases + assert_almost_equal(stats.trapezoid.pdf(0, 0, 0), 2) + assert_almost_equal(stats.trapezoid.pdf(1, 1, 1), 2) + assert_almost_equal(stats.trapezoid.pdf(0.5, 0, 0.8), + 1.11111111111111111) + assert_almost_equal(stats.trapezoid.pdf(0.5, 0.2, 1.0), + 1.11111111111111111) + + # straightforward case + assert_almost_equal(stats.trapezoid.pdf(0.1, 0.2, 0.8), 0.625) + assert_almost_equal(stats.trapezoid.pdf(0.5, 0.2, 0.8), 1.25) + assert_almost_equal(stats.trapezoid.pdf(0.9, 0.2, 0.8), 0.625) + + assert_almost_equal(stats.trapezoid.cdf(0.1, 0.2, 0.8), 0.03125) + assert_almost_equal(stats.trapezoid.cdf(0.2, 0.2, 0.8), 0.125) + assert_almost_equal(stats.trapezoid.cdf(0.5, 0.2, 0.8), 0.5) + assert_almost_equal(stats.trapezoid.cdf(0.9, 0.2, 0.8), 0.96875) + assert_almost_equal(stats.trapezoid.cdf(1.0, 0.2, 0.8), 1.0) + + def test_moments_and_entropy(self): + # issue #11795: improve precision of trapezoid stats + # Apply formulas from Wikipedia for the following parameters: + a, b, c, d = -3, -1, 2, 3 # => 1/3, 5/6, -3, 6 + p1, p2, loc, scale = (b-a) / (d-a), (c-a) / (d-a), a, d-a + h = 2 / (d+c-b-a) + + def moment(n): + return (h * ((d**(n+2) - c**(n+2)) / (d-c) + - (b**(n+2) - a**(n+2)) / (b-a)) / + (n+1) / (n+2)) + + mean = moment(1) + var = moment(2) - mean**2 + entropy = 0.5 * (d-c+b-a) / (d+c-b-a) + np.log(0.5 * (d+c-b-a)) + assert_almost_equal(stats.trapezoid.mean(p1, p2, loc, scale), + mean, decimal=13) + assert_almost_equal(stats.trapezoid.var(p1, p2, loc, scale), + var, decimal=13) + assert_almost_equal(stats.trapezoid.entropy(p1, p2, loc, scale), + entropy, decimal=13) + + # Check boundary cases where scipy d=0 or d=1. + assert_almost_equal(stats.trapezoid.mean(0, 0, -3, 6), -1, decimal=13) + assert_almost_equal(stats.trapezoid.mean(0, 1, -3, 6), 0, decimal=13) + assert_almost_equal(stats.trapezoid.var(0, 1, -3, 6), 3, decimal=13) + + def test_trapezoid_vect(self): + # test that array-valued shapes and arguments are handled + c = np.array([0.1, 0.2, 0.3]) + d = np.array([0.5, 0.6])[:, None] + x = np.array([0.15, 0.25, 0.9]) + v = stats.trapezoid.pdf(x, c, d) + + cc, dd, xx = np.broadcast_arrays(c, d, x) + + res = np.empty(xx.size, dtype=xx.dtype) + ind = np.arange(xx.size) + for i, x1, c1, d1 in zip(ind, xx.ravel(), cc.ravel(), dd.ravel()): + res[i] = stats.trapezoid.pdf(x1, c1, d1) + + assert_allclose(v, res.reshape(v.shape), atol=1e-15) + + # Check that the stats() method supports vector arguments. + v = np.asarray(stats.trapezoid.stats(c, d, moments="mvsk")) + cc, dd = np.broadcast_arrays(c, d) + res = np.empty((cc.size, 4)) # 4 stats returned per value + ind = np.arange(cc.size) + for i, c1, d1 in zip(ind, cc.ravel(), dd.ravel()): + res[i] = stats.trapezoid.stats(c1, d1, moments="mvsk") + + assert_allclose(v, res.T.reshape(v.shape), atol=1e-15) + + def test_trapz(self): + # Basic test for alias + x = np.linspace(0, 1, 10) + assert_almost_equal(stats.trapz.pdf(x, 0, 1), stats.uniform.pdf(x)) + + +class TestTriang: + def test_edge_cases(self): + with np.errstate(all='raise'): + assert_equal(stats.triang.pdf(0, 0), 2.) + assert_equal(stats.triang.pdf(0.5, 0), 1.) + assert_equal(stats.triang.pdf(1, 0), 0.) + + assert_equal(stats.triang.pdf(0, 1), 0) + assert_equal(stats.triang.pdf(0.5, 1), 1.) + assert_equal(stats.triang.pdf(1, 1), 2) + + assert_equal(stats.triang.cdf(0., 0.), 0.) + assert_equal(stats.triang.cdf(0.5, 0.), 0.75) + assert_equal(stats.triang.cdf(1.0, 0.), 1.0) + + assert_equal(stats.triang.cdf(0., 1.), 0.) + assert_equal(stats.triang.cdf(0.5, 1.), 0.25) + assert_equal(stats.triang.cdf(1., 1.), 1) + + +class TestMaxwell: + + # reference values were computed with wolfram alpha + # erfc(x/sqrt(2)) + sqrt(2/pi) * x * e^(-x^2/2) + + @pytest.mark.parametrize("x, ref", + [(20, 2.2138865931011177e-86), + (0.01, 0.999999734046458435)]) + def test_sf(self, x, ref): + assert_allclose(stats.maxwell.sf(x), ref, rtol=1e-14) + + # reference values were computed with wolfram alpha + # sqrt(2) * sqrt(Q^(-1)(3/2, q)) + + @pytest.mark.parametrize("q, ref", + [(0.001, 4.033142223656157022), + (0.9999847412109375, 0.0385743284050381), + (2**-55, 8.95564974719481)]) + def test_isf(self, q, ref): + assert_allclose(stats.maxwell.isf(q), ref, rtol=1e-15) + + +class TestMielke: + def test_moments(self): + k, s = 4.642, 0.597 + # n-th moment exists only if n < s + assert_equal(stats.mielke(k, s).moment(1), np.inf) + assert_equal(stats.mielke(k, 1.0).moment(1), np.inf) + assert_(np.isfinite(stats.mielke(k, 1.01).moment(1))) + + def test_burr_equivalence(self): + x = np.linspace(0.01, 100, 50) + k, s = 2.45, 5.32 + assert_allclose(stats.burr.pdf(x, s, k/s), stats.mielke.pdf(x, k, s)) + + +class TestBurr: + def test_endpoints_7491(self): + # gh-7491 + # Compute the pdf at the left endpoint dst.a. + data = [ + [stats.fisk, (1,), 1], + [stats.burr, (0.5, 2), 1], + [stats.burr, (1, 1), 1], + [stats.burr, (2, 0.5), 1], + [stats.burr12, (1, 0.5), 0.5], + [stats.burr12, (1, 1), 1.0], + [stats.burr12, (1, 2), 2.0]] + + ans = [_f.pdf(_f.a, *_args) for _f, _args, _ in data] + correct = [_correct_ for _f, _args, _correct_ in data] + assert_array_almost_equal(ans, correct) + + ans = [_f.logpdf(_f.a, *_args) for _f, _args, _ in data] + correct = [np.log(_correct_) for _f, _args, _correct_ in data] + assert_array_almost_equal(ans, correct) + + def test_burr_stats_9544(self): + # gh-9544. Test from gh-9978 + c, d = 5.0, 3 + mean, variance = stats.burr(c, d).stats() + # mean = sc.beta(3 + 1/5, 1. - 1/5) * 3 = 1.4110263... + # var = sc.beta(3 + 2 / 5, 1. - 2 / 5) * 3 - + # (sc.beta(3 + 1 / 5, 1. - 1 / 5) * 3) ** 2 + mean_hc, variance_hc = 1.4110263183925857, 0.22879948026191643 + assert_allclose(mean, mean_hc) + assert_allclose(variance, variance_hc) + + def test_burr_nan_mean_var_9544(self): + # gh-9544. Test from gh-9978 + c, d = 0.5, 3 + mean, variance = stats.burr(c, d).stats() + assert_(np.isnan(mean)) + assert_(np.isnan(variance)) + c, d = 1.5, 3 + mean, variance = stats.burr(c, d).stats() + assert_(np.isfinite(mean)) + assert_(np.isnan(variance)) + + c, d = 0.5, 3 + e1, e2, e3, e4 = stats.burr._munp(np.array([1, 2, 3, 4]), c, d) + assert_(np.isnan(e1)) + assert_(np.isnan(e2)) + assert_(np.isnan(e3)) + assert_(np.isnan(e4)) + c, d = 1.5, 3 + e1, e2, e3, e4 = stats.burr._munp([1, 2, 3, 4], c, d) + assert_(np.isfinite(e1)) + assert_(np.isnan(e2)) + assert_(np.isnan(e3)) + assert_(np.isnan(e4)) + c, d = 2.5, 3 + e1, e2, e3, e4 = stats.burr._munp([1, 2, 3, 4], c, d) + assert_(np.isfinite(e1)) + assert_(np.isfinite(e2)) + assert_(np.isnan(e3)) + assert_(np.isnan(e4)) + c, d = 3.5, 3 + e1, e2, e3, e4 = stats.burr._munp([1, 2, 3, 4], c, d) + assert_(np.isfinite(e1)) + assert_(np.isfinite(e2)) + assert_(np.isfinite(e3)) + assert_(np.isnan(e4)) + c, d = 4.5, 3 + e1, e2, e3, e4 = stats.burr._munp([1, 2, 3, 4], c, d) + assert_(np.isfinite(e1)) + assert_(np.isfinite(e2)) + assert_(np.isfinite(e3)) + assert_(np.isfinite(e4)) + + def test_burr_isf(self): + # reference values were computed via the reference distribution, e.g. + # mp.dps = 100 + # Burr(c=5, d=3).isf([0.1, 1e-10, 1e-20, 1e-40]) + c, d = 5.0, 3.0 + q = [0.1, 1e-10, 1e-20, 1e-40] + ref = [1.9469686558286508, 124.57309395989076, 12457.309396155173, + 124573093.96155174] + assert_allclose(stats.burr.isf(q, c, d), ref, rtol=1e-14) + + +class TestBurr12: + + @pytest.mark.parametrize('scale, expected', + [(1.0, 2.3283064359965952e-170), + (3.5, 5.987114417447875e-153)]) + def test_delta_cdf(self, scale, expected): + # Expected value computed with mpmath: + # + # def burr12sf(x, c, d, scale): + # x = mpmath.mpf(x) + # c = mpmath.mpf(c) + # d = mpmath.mpf(d) + # scale = mpmath.mpf(scale) + # return (mpmath.mp.one + (x/scale)**c)**(-d) + # + # >>> import mpmath + # >>> mpmath.mp.dps = 60 + # >>> float(burr12sf(2e5, 4, 8, 1) - burr12sf(4e5, 4, 8, 1)) + # 2.3283064359965952e-170 + # >>> float(burr12sf(2e5, 4, 8, 3.5) - burr12sf(4e5, 4, 8, 3.5)) + # 5.987114417447875e-153 + # + delta = stats.burr12._delta_cdf(2e5, 4e5, 4, 8, scale=scale) + assert_allclose(delta, expected, rtol=1e-13) + + def test_moments_edge(self): + # gh-18838 reported that burr12 moments could be invalid; see above. + # Check that this is resolved in an edge case where c*d == n, and + # compare the results against those produced by Mathematica, e.g. + # `SinghMaddalaDistribution[2, 2, 1]` at Wolfram Alpha. + c, d = 2, 2 + mean = np.pi/4 + var = 1 - np.pi**2/16 + skew = np.pi**3/(32*var**1.5) + kurtosis = np.nan + ref = [mean, var, skew, kurtosis] + res = stats.burr12(c, d).stats('mvsk') + assert_allclose(res, ref, rtol=1e-14) + + +class TestStudentizedRange: + # For alpha = .05, .01, and .001, and for each value of + # v = [1, 3, 10, 20, 120, inf], a Q was picked from each table for + # k = [2, 8, 14, 20]. + + # these arrays are written with `k` as column, and `v` as rows. + # Q values are taken from table 3: + # https://www.jstor.org/stable/2237810 + q05 = [17.97, 45.40, 54.33, 59.56, + 4.501, 8.853, 10.35, 11.24, + 3.151, 5.305, 6.028, 6.467, + 2.950, 4.768, 5.357, 5.714, + 2.800, 4.363, 4.842, 5.126, + 2.772, 4.286, 4.743, 5.012] + q01 = [90.03, 227.2, 271.8, 298.0, + 8.261, 15.64, 18.22, 19.77, + 4.482, 6.875, 7.712, 8.226, + 4.024, 5.839, 6.450, 6.823, + 3.702, 5.118, 5.562, 5.827, + 3.643, 4.987, 5.400, 5.645] + q001 = [900.3, 2272, 2718, 2980, + 18.28, 34.12, 39.69, 43.05, + 6.487, 9.352, 10.39, 11.03, + 5.444, 7.313, 7.966, 8.370, + 4.772, 6.039, 6.448, 6.695, + 4.654, 5.823, 6.191, 6.411] + qs = np.concatenate((q05, q01, q001)) + ps = [.95, .99, .999] + vs = [1, 3, 10, 20, 120, np.inf] + ks = [2, 8, 14, 20] + + data = list(zip(product(ps, vs, ks), qs)) + + # A small selection of large-v cases generated with R's `ptukey` + # Each case is in the format (q, k, v, r_result) + r_data = [ + (0.1, 3, 9001, 0.002752818526842), + (1, 10, 1000, 0.000526142388912), + (1, 3, np.inf, 0.240712641229283), + (4, 3, np.inf, 0.987012338626815), + (1, 10, np.inf, 0.000519869467083), + ] + + def test_cdf_against_tables(self): + for pvk, q in self.data: + p_expected, v, k = pvk + res_p = stats.studentized_range.cdf(q, k, v) + assert_allclose(res_p, p_expected, rtol=1e-4) + + @pytest.mark.slow + def test_ppf_against_tables(self): + for pvk, q_expected in self.data: + p, v, k = pvk + res_q = stats.studentized_range.ppf(p, k, v) + assert_allclose(res_q, q_expected, rtol=5e-4) + + path_prefix = os.path.dirname(__file__) + relative_path = "data/studentized_range_mpmath_ref.json" + with open(os.path.join(path_prefix, relative_path)) as file: + pregenerated_data = json.load(file) + + @pytest.mark.parametrize("case_result", pregenerated_data["cdf_data"]) + def test_cdf_against_mp(self, case_result): + src_case = case_result["src_case"] + mp_result = case_result["mp_result"] + qkv = src_case["q"], src_case["k"], src_case["v"] + res = stats.studentized_range.cdf(*qkv) + + assert_allclose(res, mp_result, + atol=src_case["expected_atol"], + rtol=src_case["expected_rtol"]) + + @pytest.mark.parametrize("case_result", pregenerated_data["pdf_data"]) + def test_pdf_against_mp(self, case_result): + src_case = case_result["src_case"] + mp_result = case_result["mp_result"] + qkv = src_case["q"], src_case["k"], src_case["v"] + res = stats.studentized_range.pdf(*qkv) + + assert_allclose(res, mp_result, + atol=src_case["expected_atol"], + rtol=src_case["expected_rtol"]) + + @pytest.mark.slow + @pytest.mark.xfail_on_32bit("intermittent RuntimeWarning: invalid value.") + @pytest.mark.parametrize("case_result", pregenerated_data["moment_data"]) + def test_moment_against_mp(self, case_result): + src_case = case_result["src_case"] + mp_result = case_result["mp_result"] + mkv = src_case["m"], src_case["k"], src_case["v"] + + # Silence invalid value encountered warnings. Actual problems will be + # caught by the result comparison. + with np.errstate(invalid='ignore'): + res = stats.studentized_range.moment(*mkv) + + assert_allclose(res, mp_result, + atol=src_case["expected_atol"], + rtol=src_case["expected_rtol"]) + + def test_pdf_integration(self): + k, v = 3, 10 + # Test whether PDF integration is 1 like it should be. + res = quad(stats.studentized_range.pdf, 0, np.inf, args=(k, v)) + assert_allclose(res[0], 1) + + @pytest.mark.xslow + def test_pdf_against_cdf(self): + k, v = 3, 10 + + # Test whether the integrated PDF matches the CDF using cumulative + # integration. Use a small step size to reduce error due to the + # summation. This is slow, but tests the results well. + x = np.arange(0, 10, step=0.01) + + y_cdf = stats.studentized_range.cdf(x, k, v)[1:] + y_pdf_raw = stats.studentized_range.pdf(x, k, v) + y_pdf_cumulative = cumulative_trapezoid(y_pdf_raw, x) + + # Because of error caused by the summation, use a relatively large rtol + assert_allclose(y_pdf_cumulative, y_cdf, rtol=1e-4) + + @pytest.mark.parametrize("r_case_result", r_data) + def test_cdf_against_r(self, r_case_result): + # Test large `v` values using R + q, k, v, r_res = r_case_result + with np.errstate(invalid='ignore'): + res = stats.studentized_range.cdf(q, k, v) + assert_allclose(res, r_res) + + @pytest.mark.slow + @pytest.mark.xfail_on_32bit("intermittent RuntimeWarning: invalid value.") + def test_moment_vectorization(self): + # Test moment broadcasting. Calls `_munp` directly because + # `rv_continuous.moment` is broken at time of writing. See gh-12192 + + # Silence invalid value encountered warnings. Actual problems will be + # caught by the result comparison. + with np.errstate(invalid='ignore'): + m = stats.studentized_range._munp([1, 2], [4, 5], [10, 11]) + + assert_allclose(m.shape, (2,)) + + with pytest.raises(ValueError, match="...could not be broadcast..."): + stats.studentized_range._munp(1, [4, 5], [10, 11, 12]) + + @pytest.mark.xslow + def test_fitstart_valid(self): + with suppress_warnings() as sup, np.errstate(invalid="ignore"): + # the integration warning message may differ + sup.filter(IntegrationWarning) + k, df, _, _ = stats.studentized_range._fitstart([1, 2, 3]) + assert_(stats.studentized_range._argcheck(k, df)) + + def test_infinite_df(self): + # Check that the CDF and PDF infinite and normal integrators + # roughly match for a high df case + res = stats.studentized_range.pdf(3, 10, np.inf) + res_finite = stats.studentized_range.pdf(3, 10, 99999) + assert_allclose(res, res_finite, atol=1e-4, rtol=1e-4) + + res = stats.studentized_range.cdf(3, 10, np.inf) + res_finite = stats.studentized_range.cdf(3, 10, 99999) + assert_allclose(res, res_finite, atol=1e-4, rtol=1e-4) + + def test_df_cutoff(self): + # Test that the CDF and PDF properly switch integrators at df=100,000. + # The infinite integrator should be different enough that it fails + # an allclose assertion. Also sanity check that using the same + # integrator does pass the allclose with a 1-df difference, which + # should be tiny. + + res = stats.studentized_range.pdf(3, 10, 100000) + res_finite = stats.studentized_range.pdf(3, 10, 99999) + res_sanity = stats.studentized_range.pdf(3, 10, 99998) + assert_raises(AssertionError, assert_allclose, res, res_finite, + atol=1e-6, rtol=1e-6) + assert_allclose(res_finite, res_sanity, atol=1e-6, rtol=1e-6) + + res = stats.studentized_range.cdf(3, 10, 100000) + res_finite = stats.studentized_range.cdf(3, 10, 99999) + res_sanity = stats.studentized_range.cdf(3, 10, 99998) + assert_raises(AssertionError, assert_allclose, res, res_finite, + atol=1e-6, rtol=1e-6) + assert_allclose(res_finite, res_sanity, atol=1e-6, rtol=1e-6) + + def test_clipping(self): + # The result of this computation was -9.9253938401489e-14 on some + # systems. The correct result is very nearly zero, but should not be + # negative. + q, k, v = 34.6413996195345746, 3, 339 + p = stats.studentized_range.sf(q, k, v) + assert_allclose(p, 0, atol=1e-10) + assert p >= 0 + + +def test_540_567(): + # test for nan returned in tickets 540, 567 + assert_almost_equal(stats.norm.cdf(-1.7624320982), 0.03899815971089126, + decimal=10, err_msg='test_540_567') + assert_almost_equal(stats.norm.cdf(-1.7624320983), 0.038998159702449846, + decimal=10, err_msg='test_540_567') + assert_almost_equal(stats.norm.cdf(1.38629436112, loc=0.950273420309, + scale=0.204423758009), + 0.98353464004309321, + decimal=10, err_msg='test_540_567') + + +def test_regression_ticket_1326(): + # adjust to avoid nan with 0*log(0) + assert_almost_equal(stats.chi2.pdf(0.0, 2), 0.5, 14) + + +def test_regression_tukey_lambda(): + # Make sure that Tukey-Lambda distribution correctly handles + # non-positive lambdas. + x = np.linspace(-5.0, 5.0, 101) + + with np.errstate(divide='ignore'): + for lam in [0.0, -1.0, -2.0, np.array([[-1.0], [0.0], [-2.0]])]: + p = stats.tukeylambda.pdf(x, lam) + assert_((p != 0.0).all()) + assert_(~np.isnan(p).all()) + + lam = np.array([[-1.0], [0.0], [2.0]]) + p = stats.tukeylambda.pdf(x, lam) + + assert_(~np.isnan(p).all()) + assert_((p[0] != 0.0).all()) + assert_((p[1] != 0.0).all()) + assert_((p[2] != 0.0).any()) + assert_((p[2] == 0.0).any()) + + +@pytest.mark.skipif(DOCSTRINGS_STRIPPED, reason="docstrings stripped") +def test_regression_ticket_1421(): + assert_('pdf(x, mu, loc=0, scale=1)' not in stats.poisson.__doc__) + assert_('pmf(x,' in stats.poisson.__doc__) + + +def test_nan_arguments_gh_issue_1362(): + with np.errstate(invalid='ignore'): + assert_(np.isnan(stats.t.logcdf(1, np.nan))) + assert_(np.isnan(stats.t.cdf(1, np.nan))) + assert_(np.isnan(stats.t.logsf(1, np.nan))) + assert_(np.isnan(stats.t.sf(1, np.nan))) + assert_(np.isnan(stats.t.pdf(1, np.nan))) + assert_(np.isnan(stats.t.logpdf(1, np.nan))) + assert_(np.isnan(stats.t.ppf(1, np.nan))) + assert_(np.isnan(stats.t.isf(1, np.nan))) + + assert_(np.isnan(stats.bernoulli.logcdf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.cdf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.logsf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.sf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.pmf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.logpmf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.ppf(np.nan, 0.5))) + assert_(np.isnan(stats.bernoulli.isf(np.nan, 0.5))) + + +def test_frozen_fit_ticket_1536(): + np.random.seed(5678) + true = np.array([0.25, 0., 0.5]) + x = stats.lognorm.rvs(true[0], true[1], true[2], size=100) + + with np.errstate(divide='ignore'): + params = np.array(stats.lognorm.fit(x, floc=0.)) + + assert_almost_equal(params, true, decimal=2) + + params = np.array(stats.lognorm.fit(x, fscale=0.5, loc=0)) + assert_almost_equal(params, true, decimal=2) + + params = np.array(stats.lognorm.fit(x, f0=0.25, loc=0)) + assert_almost_equal(params, true, decimal=2) + + params = np.array(stats.lognorm.fit(x, f0=0.25, floc=0)) + assert_almost_equal(params, true, decimal=2) + + np.random.seed(5678) + loc = 1 + floc = 0.9 + x = stats.norm.rvs(loc, 2., size=100) + params = np.array(stats.norm.fit(x, floc=floc)) + expected = np.array([floc, np.sqrt(((x-floc)**2).mean())]) + assert_almost_equal(params, expected, decimal=4) + + +def test_regression_ticket_1530(): + # Check the starting value works for Cauchy distribution fit. + np.random.seed(654321) + rvs = stats.cauchy.rvs(size=100) + params = stats.cauchy.fit(rvs) + expected = (0.045, 1.142) + assert_almost_equal(params, expected, decimal=1) + + +def test_gh_pr_4806(): + # Check starting values for Cauchy distribution fit. + np.random.seed(1234) + x = np.random.randn(42) + for offset in 10000.0, 1222333444.0: + loc, scale = stats.cauchy.fit(x + offset) + assert_allclose(loc, offset, atol=1.0) + assert_allclose(scale, 0.6, atol=1.0) + + +def test_tukeylambda_stats_ticket_1545(): + # Some test for the variance and kurtosis of the Tukey Lambda distr. + # See test_tukeylamdba_stats.py for more tests. + + mv = stats.tukeylambda.stats(0, moments='mvsk') + # Known exact values: + expected = [0, np.pi**2/3, 0, 1.2] + assert_almost_equal(mv, expected, decimal=10) + + mv = stats.tukeylambda.stats(3.13, moments='mvsk') + # 'expected' computed with mpmath. + expected = [0, 0.0269220858861465102, 0, -0.898062386219224104] + assert_almost_equal(mv, expected, decimal=10) + + mv = stats.tukeylambda.stats(0.14, moments='mvsk') + # 'expected' computed with mpmath. + expected = [0, 2.11029702221450250, 0, -0.02708377353223019456] + assert_almost_equal(mv, expected, decimal=10) + + +def test_poisson_logpmf_ticket_1436(): + assert_(np.isfinite(stats.poisson.logpmf(1500, 200))) + + +def test_powerlaw_stats(): + """Test the powerlaw stats function. + + This unit test is also a regression test for ticket 1548. + + The exact values are: + mean: + mu = a / (a + 1) + variance: + sigma**2 = a / ((a + 2) * (a + 1) ** 2) + skewness: + One formula (see https://en.wikipedia.org/wiki/Skewness) is + gamma_1 = (E[X**3] - 3*mu*E[X**2] + 2*mu**3) / sigma**3 + A short calculation shows that E[X**k] is a / (a + k), so gamma_1 + can be implemented as + n = a/(a+3) - 3*(a/(a+1))*a/(a+2) + 2*(a/(a+1))**3 + d = sqrt(a/((a+2)*(a+1)**2)) ** 3 + gamma_1 = n/d + Either by simplifying, or by a direct calculation of mu_3 / sigma**3, + one gets the more concise formula: + gamma_1 = -2.0 * ((a - 1) / (a + 3)) * sqrt((a + 2) / a) + kurtosis: (See https://en.wikipedia.org/wiki/Kurtosis) + The excess kurtosis is + gamma_2 = mu_4 / sigma**4 - 3 + A bit of calculus and algebra (sympy helps) shows that + mu_4 = 3*a*(3*a**2 - a + 2) / ((a+1)**4 * (a+2) * (a+3) * (a+4)) + so + gamma_2 = 3*(3*a**2 - a + 2) * (a+2) / (a*(a+3)*(a+4)) - 3 + which can be rearranged to + gamma_2 = 6 * (a**3 - a**2 - 6*a + 2) / (a*(a+3)*(a+4)) + """ + cases = [(1.0, (0.5, 1./12, 0.0, -1.2)), + (2.0, (2./3, 2./36, -0.56568542494924734, -0.6))] + for a, exact_mvsk in cases: + mvsk = stats.powerlaw.stats(a, moments="mvsk") + assert_array_almost_equal(mvsk, exact_mvsk) + + +def test_powerlaw_edge(): + # Regression test for gh-3986. + p = stats.powerlaw.logpdf(0, 1) + assert_equal(p, 0.0) + + +def test_exponpow_edge(): + # Regression test for gh-3982. + p = stats.exponpow.logpdf(0, 1) + assert_equal(p, 0.0) + + # Check pdf and logpdf at x = 0 for other values of b. + p = stats.exponpow.pdf(0, [0.25, 1.0, 1.5]) + assert_equal(p, [np.inf, 1.0, 0.0]) + p = stats.exponpow.logpdf(0, [0.25, 1.0, 1.5]) + assert_equal(p, [np.inf, 0.0, -np.inf]) + + +def test_gengamma_edge(): + # Regression test for gh-3985. + p = stats.gengamma.pdf(0, 1, 1) + assert_equal(p, 1.0) + + +@pytest.mark.parametrize("a, c, ref, tol", + [(1500000.0, 1, 8.529426144018633, 1e-15), + (1e+30, 1, 35.95771492811536, 1e-15), + (1e+100, 1, 116.54819318290696, 1e-15), + (3e3, 1, 5.422011196659015, 1e-13), + (3e6, -1e100, -236.29663213396054, 1e-15), + (3e60, 1e-100, 1.3925371786831085e+102, 1e-15)]) +def test_gengamma_extreme_entropy(a, c, ref, tol): + # The reference values were calculated with mpmath: + # from mpmath import mp + # mp.dps = 500 + # + # def gen_entropy(a, c): + # a, c = mp.mpf(a), mp.mpf(c) + # val = mp.digamma(a) + # h = (a * (mp.one - val) + val/c + mp.loggamma(a) - mp.log(abs(c))) + # return float(h) + assert_allclose(stats.gengamma.entropy(a, c), ref, rtol=tol) + + +def test_gengamma_endpoint_with_neg_c(): + p = stats.gengamma.pdf(0, 1, -1) + assert p == 0.0 + logp = stats.gengamma.logpdf(0, 1, -1) + assert logp == -np.inf + + +def test_gengamma_munp(): + # Regression tests for gh-4724. + p = stats.gengamma._munp(-2, 200, 1.) + assert_almost_equal(p, 1./199/198) + + p = stats.gengamma._munp(-2, 10, 1.) + assert_almost_equal(p, 1./9/8) + + +def test_ksone_fit_freeze(): + # Regression test for ticket #1638. + d = np.array( + [-0.18879233, 0.15734249, 0.18695107, 0.27908787, -0.248649, + -0.2171497, 0.12233512, 0.15126419, 0.03119282, 0.4365294, + 0.08930393, -0.23509903, 0.28231224, -0.09974875, -0.25196048, + 0.11102028, 0.1427649, 0.10176452, 0.18754054, 0.25826724, + 0.05988819, 0.0531668, 0.21906056, 0.32106729, 0.2117662, + 0.10886442, 0.09375789, 0.24583286, -0.22968366, -0.07842391, + -0.31195432, -0.21271196, 0.1114243, -0.13293002, 0.01331725, + -0.04330977, -0.09485776, -0.28434547, 0.22245721, -0.18518199, + -0.10943985, -0.35243174, 0.06897665, -0.03553363, -0.0701746, + -0.06037974, 0.37670779, -0.21684405]) + + with np.errstate(invalid='ignore'): + with suppress_warnings() as sup: + sup.filter(IntegrationWarning, + "The maximum number of subdivisions .50. has been " + "achieved.") + sup.filter(RuntimeWarning, + "floating point number truncated to an integer") + stats.ksone.fit(d) + + +def test_norm_logcdf(): + # Test precision of the logcdf of the normal distribution. + # This precision was enhanced in ticket 1614. + x = -np.asarray(list(range(0, 120, 4))) + # Values from R + expected = [-0.69314718, -10.36010149, -35.01343716, -75.41067300, + -131.69539607, -203.91715537, -292.09872100, -396.25241451, + -516.38564863, -652.50322759, -804.60844201, -972.70364403, + -1156.79057310, -1356.87055173, -1572.94460885, -1805.01356068, + -2053.07806561, -2317.13866238, -2597.19579746, -2893.24984493, + -3205.30112136, -3533.34989701, -3877.39640444, -4237.44084522, + -4613.48339520, -5005.52420869, -5413.56342187, -5837.60115548, + -6277.63751711, -6733.67260303] + + assert_allclose(stats.norm().logcdf(x), expected, atol=1e-8) + + # also test the complex-valued code path + assert_allclose(stats.norm().logcdf(x + 1e-14j).real, expected, atol=1e-8) + + # test the accuracy: d(logcdf)/dx = pdf / cdf \equiv exp(logpdf - logcdf) + deriv = (stats.norm.logcdf(x + 1e-10j)/1e-10).imag + deriv_expected = np.exp(stats.norm.logpdf(x) - stats.norm.logcdf(x)) + assert_allclose(deriv, deriv_expected, atol=1e-10) + + +def test_levy_cdf_ppf(): + # Test levy.cdf, including small arguments. + x = np.array([1000, 1.0, 0.5, 0.1, 0.01, 0.001]) + + # Expected values were calculated separately with mpmath. + # E.g. + # >>> mpmath.mp.dps = 100 + # >>> x = mpmath.mp.mpf('0.01') + # >>> cdf = mpmath.erfc(mpmath.sqrt(1/(2*x))) + expected = np.array([0.9747728793699604, + 0.3173105078629141, + 0.1572992070502851, + 0.0015654022580025495, + 1.523970604832105e-23, + 1.795832784800726e-219]) + + y = stats.levy.cdf(x) + assert_allclose(y, expected, rtol=1e-10) + + # ppf(expected) should get us back to x. + xx = stats.levy.ppf(expected) + assert_allclose(xx, x, rtol=1e-13) + + +def test_levy_sf(): + # Large values, far into the tail of the distribution. + x = np.array([1e15, 1e25, 1e35, 1e50]) + # Expected values were calculated with mpmath. + expected = np.array([2.5231325220201597e-08, + 2.52313252202016e-13, + 2.52313252202016e-18, + 7.978845608028653e-26]) + y = stats.levy.sf(x) + assert_allclose(y, expected, rtol=1e-14) + + +# The expected values for levy.isf(p) were calculated with mpmath. +# For loc=0 and scale=1, the inverse SF can be computed with +# +# import mpmath +# +# def levy_invsf(p): +# return 1/(2*mpmath.erfinv(p)**2) +# +# For example, with mpmath.mp.dps set to 60, float(levy_invsf(1e-20)) +# returns 6.366197723675814e+39. +# +@pytest.mark.parametrize('p, expected_isf', + [(1e-20, 6.366197723675814e+39), + (1e-8, 6366197723675813.0), + (0.375, 4.185810119346273), + (0.875, 0.42489442055310134), + (0.999, 0.09235685880262713), + (0.9999999962747097, 0.028766845244146945)]) +def test_levy_isf(p, expected_isf): + x = stats.levy.isf(p) + assert_allclose(x, expected_isf, atol=5e-15) + + +def test_levy_l_sf(): + # Test levy_l.sf for small arguments. + x = np.array([-0.016, -0.01, -0.005, -0.0015]) + # Expected values were calculated with mpmath. + expected = np.array([2.6644463892359302e-15, + 1.523970604832107e-23, + 2.0884875837625492e-45, + 5.302850374626878e-147]) + y = stats.levy_l.sf(x) + assert_allclose(y, expected, rtol=1e-13) + + +def test_levy_l_isf(): + # Test roundtrip sf(isf(p)), including a small input value. + p = np.array([3.0e-15, 0.25, 0.99]) + x = stats.levy_l.isf(p) + q = stats.levy_l.sf(x) + assert_allclose(q, p, rtol=5e-14) + + +def test_hypergeom_interval_1802(): + # these two had endless loops + assert_equal(stats.hypergeom.interval(.95, 187601, 43192, 757), + (152.0, 197.0)) + assert_equal(stats.hypergeom.interval(.945, 187601, 43192, 757), + (152.0, 197.0)) + # this was working also before + assert_equal(stats.hypergeom.interval(.94, 187601, 43192, 757), + (153.0, 196.0)) + + # degenerate case .a == .b + assert_equal(stats.hypergeom.ppf(0.02, 100, 100, 8), 8) + assert_equal(stats.hypergeom.ppf(1, 100, 100, 8), 8) + + +def test_distribution_too_many_args(): + np.random.seed(1234) + + # Check that a TypeError is raised when too many args are given to a method + # Regression test for ticket 1815. + x = np.linspace(0.1, 0.7, num=5) + assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, loc=1.0) + assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, 4, loc=1.0) + assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, 4, 5) + assert_raises(TypeError, stats.gamma.pdf, x, 2, 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.rvs, 2., 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.cdf, x, 2., 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.ppf, x, 2., 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.stats, 2., 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.entropy, 2., 3, loc=1.0, scale=0.5) + assert_raises(TypeError, stats.gamma.fit, x, 2., 3, loc=1.0, scale=0.5) + + # These should not give errors + stats.gamma.pdf(x, 2, 3) # loc=3 + stats.gamma.pdf(x, 2, 3, 4) # loc=3, scale=4 + stats.gamma.stats(2., 3) + stats.gamma.stats(2., 3, 4) + stats.gamma.stats(2., 3, 4, 'mv') + stats.gamma.rvs(2., 3, 4, 5) + stats.gamma.fit(stats.gamma.rvs(2., size=7), 2.) + + # Also for a discrete distribution + stats.geom.pmf(x, 2, loc=3) # no error, loc=3 + assert_raises(TypeError, stats.geom.pmf, x, 2, 3, 4) + assert_raises(TypeError, stats.geom.pmf, x, 2, 3, loc=4) + + # And for distributions with 0, 2 and 3 args respectively + assert_raises(TypeError, stats.expon.pdf, x, 3, loc=1.0) + assert_raises(TypeError, stats.exponweib.pdf, x, 3, 4, 5, loc=1.0) + assert_raises(TypeError, stats.exponweib.pdf, x, 3, 4, 5, 0.1, 0.1) + assert_raises(TypeError, stats.ncf.pdf, x, 3, 4, 5, 6, loc=1.0) + assert_raises(TypeError, stats.ncf.pdf, x, 3, 4, 5, 6, 1.0, scale=0.5) + stats.ncf.pdf(x, 3, 4, 5, 6, 1.0) # 3 args, plus loc/scale + + +def test_ncx2_tails_ticket_955(): + # Trac #955 -- check that the cdf computed by special functions + # matches the integrated pdf + a = stats.ncx2.cdf(np.arange(20, 25, 0.2), 2, 1.07458615e+02) + b = stats.ncx2._cdfvec(np.arange(20, 25, 0.2), 2, 1.07458615e+02) + assert_allclose(a, b, rtol=1e-3, atol=0) + + +def test_ncx2_tails_pdf(): + # ncx2.pdf does not return nans in extreme tails(example from gh-1577) + # NB: this is to check that nan_to_num is not needed in ncx2.pdf + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + assert_equal(stats.ncx2.pdf(1, np.arange(340, 350), 2), 0) + logval = stats.ncx2.logpdf(1, np.arange(340, 350), 2) + + assert_(np.isneginf(logval).all()) + + # Verify logpdf has extended precision when pdf underflows to 0 + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + assert_equal(stats.ncx2.pdf(10000, 3, 12), 0) + assert_allclose(stats.ncx2.logpdf(10000, 3, 12), -4662.444377524883) + + +@pytest.mark.parametrize('method, expected', [ + ('cdf', np.array([2.497951336e-09, 3.437288941e-10])), + ('pdf', np.array([1.238579980e-07, 1.710041145e-08])), + ('logpdf', np.array([-15.90413011, -17.88416331])), + ('ppf', np.array([4.865182052, 7.017182271])) +]) +def test_ncx2_zero_nc(method, expected): + # gh-5441 + # ncx2 with nc=0 is identical to chi2 + # Comparison to R (v3.5.1) + # > options(digits=10) + # > pchisq(0.1, df=10, ncp=c(0,4)) + # > dchisq(0.1, df=10, ncp=c(0,4)) + # > dchisq(0.1, df=10, ncp=c(0,4), log=TRUE) + # > qchisq(0.1, df=10, ncp=c(0,4)) + + result = getattr(stats.ncx2, method)(0.1, nc=[0, 4], df=10) + assert_allclose(result, expected, atol=1e-15) + + +def test_ncx2_zero_nc_rvs(): + # gh-5441 + # ncx2 with nc=0 is identical to chi2 + result = stats.ncx2.rvs(df=10, nc=0, random_state=1) + expected = stats.chi2.rvs(df=10, random_state=1) + assert_allclose(result, expected, atol=1e-15) + + +def test_ncx2_gh12731(): + # test that gh-12731 is resolved; previously these were all 0.5 + nc = 10**np.arange(5, 10) + assert_equal(stats.ncx2.cdf(1e4, df=1, nc=nc), 0) + + +def test_ncx2_gh8665(): + # test that gh-8665 is resolved; previously this tended to nonzero value + x = np.array([4.99515382e+00, 1.07617327e+01, 2.31854502e+01, + 4.99515382e+01, 1.07617327e+02, 2.31854502e+02, + 4.99515382e+02, 1.07617327e+03, 2.31854502e+03, + 4.99515382e+03, 1.07617327e+04, 2.31854502e+04, + 4.99515382e+04]) + nu, lam = 20, 499.51538166556196 + + sf = stats.ncx2.sf(x, df=nu, nc=lam) + # computed in R. Couldn't find a survival function implementation + # options(digits=16) + # x <- c(4.99515382e+00, 1.07617327e+01, 2.31854502e+01, 4.99515382e+01, + # 1.07617327e+02, 2.31854502e+02, 4.99515382e+02, 1.07617327e+03, + # 2.31854502e+03, 4.99515382e+03, 1.07617327e+04, 2.31854502e+04, + # 4.99515382e+04) + # nu <- 20 + # lam <- 499.51538166556196 + # 1 - pchisq(x, df = nu, ncp = lam) + sf_expected = [1.0000000000000000, 1.0000000000000000, 1.0000000000000000, + 1.0000000000000000, 1.0000000000000000, 0.9999999999999888, + 0.6646525582135460, 0.0000000000000000, 0.0000000000000000, + 0.0000000000000000, 0.0000000000000000, 0.0000000000000000, + 0.0000000000000000] + assert_allclose(sf, sf_expected, atol=1e-12) + + +def test_ncx2_gh11777(): + # regression test for gh-11777: + # At high values of degrees of freedom df, ensure the pdf of ncx2 does + # not get clipped to zero when the non-centrality parameter is + # sufficiently less than df + df = 6700 + nc = 5300 + x = np.linspace(stats.ncx2.ppf(0.001, df, nc), + stats.ncx2.ppf(0.999, df, nc), num=10000) + ncx2_pdf = stats.ncx2.pdf(x, df, nc) + gauss_approx = stats.norm.pdf(x, df + nc, np.sqrt(2 * df + 4 * nc)) + # use huge tolerance as we're only looking for obvious discrepancy + assert_allclose(ncx2_pdf, gauss_approx, atol=1e-4) + + +# Expected values for foldnorm.sf were computed with mpmath: +# +# from mpmath import mp +# mp.dps = 60 +# def foldcauchy_sf(x, c): +# x = mp.mpf(x) +# c = mp.mpf(c) +# return mp.one - (mp.atan(x - c) + mp.atan(x + c))/mp.pi +# +# E.g. +# +# >>> float(foldcauchy_sf(2, 1)) +# 0.35241638234956674 +# +@pytest.mark.parametrize('x, c, expected', + [(2, 1, 0.35241638234956674), + (2, 2, 0.5779791303773694), + (1e13, 1, 6.366197723675813e-14), + (2e16, 1, 3.183098861837907e-17), + (1e13, 2e11, 6.368745221764519e-14), + (0.125, 200, 0.999998010612169)]) +def test_foldcauchy_sf(x, c, expected): + sf = stats.foldcauchy.sf(x, c) + assert_allclose(sf, expected, 2e-15) + + +# The same mpmath code shown in the comments above test_foldcauchy_sf() +# is used to create these expected values. +@pytest.mark.parametrize('x, expected', + [(2, 0.2951672353008665), + (1e13, 6.366197723675813e-14), + (2e16, 3.183098861837907e-17), + (5e80, 1.2732395447351629e-81)]) +def test_halfcauchy_sf(x, expected): + sf = stats.halfcauchy.sf(x) + assert_allclose(sf, expected, 2e-15) + + +# Expected value computed with mpmath: +# expected = mp.cot(mp.pi*p/2) +@pytest.mark.parametrize('p, expected', + [(0.9999995, 7.853981633329977e-07), + (0.975, 0.039290107007669675), + (0.5, 1.0), + (0.01, 63.65674116287158), + (1e-14, 63661977236758.13), + (5e-80, 1.2732395447351627e+79)]) +def test_halfcauchy_isf(p, expected): + x = stats.halfcauchy.isf(p) + assert_allclose(x, expected) + + +def test_foldnorm_zero(): + # Parameter value c=0 was not enabled, see gh-2399. + rv = stats.foldnorm(0, scale=1) + assert_equal(rv.cdf(0), 0) # rv.cdf(0) previously resulted in: nan + + +# Expected values for foldnorm.sf were computed with mpmath: +# +# from mpmath import mp +# mp.dps = 60 +# def foldnorm_sf(x, c): +# x = mp.mpf(x) +# c = mp.mpf(c) +# return mp.ncdf(-x+c) + mp.ncdf(-x-c) +# +# E.g. +# +# >>> float(foldnorm_sf(2, 1)) +# 0.16000515196308715 +# +@pytest.mark.parametrize('x, c, expected', + [(2, 1, 0.16000515196308715), + (20, 1, 8.527223952630977e-81), + (10, 15, 0.9999997133484281), + (25, 15, 7.619853024160525e-24)]) +def test_foldnorm_sf(x, c, expected): + sf = stats.foldnorm.sf(x, c) + assert_allclose(sf, expected, 1e-14) + + +def test_stats_shapes_argcheck(): + # stats method was failing for vector shapes if some of the values + # were outside of the allowed range, see gh-2678 + mv3 = stats.invgamma.stats([0.0, 0.5, 1.0], 1, 0.5) # 0 is not a legal `a` + mv2 = stats.invgamma.stats([0.5, 1.0], 1, 0.5) + mv2_augmented = tuple(np.r_[np.nan, _] for _ in mv2) + assert_equal(mv2_augmented, mv3) + + # -1 is not a legal shape parameter + mv3 = stats.lognorm.stats([2, 2.4, -1]) + mv2 = stats.lognorm.stats([2, 2.4]) + mv2_augmented = tuple(np.r_[_, np.nan] for _ in mv2) + assert_equal(mv2_augmented, mv3) + + # FIXME: this is only a quick-and-dirty test of a quick-and-dirty bugfix. + # stats method with multiple shape parameters is not properly vectorized + # anyway, so some distributions may or may not fail. + + +# Test subclassing distributions w/ explicit shapes + +class _distr_gen(stats.rv_continuous): + def _pdf(self, x, a): + return 42 + + +class _distr2_gen(stats.rv_continuous): + def _cdf(self, x, a): + return 42 * a + x + + +class _distr3_gen(stats.rv_continuous): + def _pdf(self, x, a, b): + return a + b + + def _cdf(self, x, a): + # Different # of shape params from _pdf, to be able to check that + # inspection catches the inconsistency. + return 42 * a + x + + +class _distr6_gen(stats.rv_continuous): + # Two shape parameters (both _pdf and _cdf defined, consistent shapes.) + def _pdf(self, x, a, b): + return a*x + b + + def _cdf(self, x, a, b): + return 42 * a + x + + +class TestSubclassingExplicitShapes: + # Construct a distribution w/ explicit shapes parameter and test it. + + def test_correct_shapes(self): + dummy_distr = _distr_gen(name='dummy', shapes='a') + assert_equal(dummy_distr.pdf(1, a=1), 42) + + def test_wrong_shapes_1(self): + dummy_distr = _distr_gen(name='dummy', shapes='A') + assert_raises(TypeError, dummy_distr.pdf, 1, **dict(a=1)) + + def test_wrong_shapes_2(self): + dummy_distr = _distr_gen(name='dummy', shapes='a, b, c') + dct = dict(a=1, b=2, c=3) + assert_raises(TypeError, dummy_distr.pdf, 1, **dct) + + def test_shapes_string(self): + # shapes must be a string + dct = dict(name='dummy', shapes=42) + assert_raises(TypeError, _distr_gen, **dct) + + def test_shapes_identifiers_1(self): + # shapes must be a comma-separated list of valid python identifiers + dct = dict(name='dummy', shapes='(!)') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_identifiers_2(self): + dct = dict(name='dummy', shapes='4chan') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_identifiers_3(self): + dct = dict(name='dummy', shapes='m(fti)') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_identifiers_nodefaults(self): + dct = dict(name='dummy', shapes='a=2') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_args(self): + dct = dict(name='dummy', shapes='*args') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_kwargs(self): + dct = dict(name='dummy', shapes='**kwargs') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_keywords(self): + # python keywords cannot be used for shape parameters + dct = dict(name='dummy', shapes='a, b, c, lambda') + assert_raises(SyntaxError, _distr_gen, **dct) + + def test_shapes_signature(self): + # test explicit shapes which agree w/ the signature of _pdf + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, a): + return stats.norm._pdf(x) * a + + dist = _dist_gen(shapes='a') + assert_equal(dist.pdf(0.5, a=2), stats.norm.pdf(0.5)*2) + + def test_shapes_signature_inconsistent(self): + # test explicit shapes which do not agree w/ the signature of _pdf + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, a): + return stats.norm._pdf(x) * a + + dist = _dist_gen(shapes='a, b') + assert_raises(TypeError, dist.pdf, 0.5, **dict(a=1, b=2)) + + def test_star_args(self): + # test _pdf with only starargs + # NB: **kwargs of pdf will never reach _pdf + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, *args): + extra_kwarg = args[0] + return stats.norm._pdf(x) * extra_kwarg + + dist = _dist_gen(shapes='extra_kwarg') + assert_equal(dist.pdf(0.5, extra_kwarg=33), stats.norm.pdf(0.5)*33) + assert_equal(dist.pdf(0.5, 33), stats.norm.pdf(0.5)*33) + assert_raises(TypeError, dist.pdf, 0.5, **dict(xxx=33)) + + def test_star_args_2(self): + # test _pdf with named & starargs + # NB: **kwargs of pdf will never reach _pdf + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, offset, *args): + extra_kwarg = args[0] + return stats.norm._pdf(x) * extra_kwarg + offset + + dist = _dist_gen(shapes='offset, extra_kwarg') + assert_equal(dist.pdf(0.5, offset=111, extra_kwarg=33), + stats.norm.pdf(0.5)*33 + 111) + assert_equal(dist.pdf(0.5, 111, 33), + stats.norm.pdf(0.5)*33 + 111) + + def test_extra_kwarg(self): + # **kwargs to _pdf are ignored. + # this is a limitation of the framework (_pdf(x, *goodargs)) + class _distr_gen(stats.rv_continuous): + def _pdf(self, x, *args, **kwargs): + # _pdf should handle *args, **kwargs itself. Here "handling" + # is ignoring *args and looking for ``extra_kwarg`` and using + # that. + extra_kwarg = kwargs.pop('extra_kwarg', 1) + return stats.norm._pdf(x) * extra_kwarg + + dist = _distr_gen(shapes='extra_kwarg') + assert_equal(dist.pdf(1, extra_kwarg=3), stats.norm.pdf(1)) + + def test_shapes_empty_string(self): + # shapes='' is equivalent to shapes=None + class _dist_gen(stats.rv_continuous): + def _pdf(self, x): + return stats.norm.pdf(x) + + dist = _dist_gen(shapes='') + assert_equal(dist.pdf(0.5), stats.norm.pdf(0.5)) + + +class TestSubclassingNoShapes: + # Construct a distribution w/o explicit shapes parameter and test it. + + def test_only__pdf(self): + dummy_distr = _distr_gen(name='dummy') + assert_equal(dummy_distr.pdf(1, a=1), 42) + + def test_only__cdf(self): + # _pdf is determined from _cdf by taking numerical derivative + dummy_distr = _distr2_gen(name='dummy') + assert_almost_equal(dummy_distr.pdf(1, a=1), 1) + + @pytest.mark.skipif(DOCSTRINGS_STRIPPED, reason="docstring stripped") + def test_signature_inspection(self): + # check that _pdf signature inspection works correctly, and is used in + # the class docstring + dummy_distr = _distr_gen(name='dummy') + assert_equal(dummy_distr.numargs, 1) + assert_equal(dummy_distr.shapes, 'a') + res = re.findall(r'logpdf\(x, a, loc=0, scale=1\)', + dummy_distr.__doc__) + assert_(len(res) == 1) + + @pytest.mark.skipif(DOCSTRINGS_STRIPPED, reason="docstring stripped") + def test_signature_inspection_2args(self): + # same for 2 shape params and both _pdf and _cdf defined + dummy_distr = _distr6_gen(name='dummy') + assert_equal(dummy_distr.numargs, 2) + assert_equal(dummy_distr.shapes, 'a, b') + res = re.findall(r'logpdf\(x, a, b, loc=0, scale=1\)', + dummy_distr.__doc__) + assert_(len(res) == 1) + + def test_signature_inspection_2args_incorrect_shapes(self): + # both _pdf and _cdf defined, but shapes are inconsistent: raises + assert_raises(TypeError, _distr3_gen, name='dummy') + + def test_defaults_raise(self): + # default arguments should raise + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, a=42): + return 42 + assert_raises(TypeError, _dist_gen, **dict(name='dummy')) + + def test_starargs_raise(self): + # without explicit shapes, *args are not allowed + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, a, *args): + return 42 + assert_raises(TypeError, _dist_gen, **dict(name='dummy')) + + def test_kwargs_raise(self): + # without explicit shapes, **kwargs are not allowed + class _dist_gen(stats.rv_continuous): + def _pdf(self, x, a, **kwargs): + return 42 + assert_raises(TypeError, _dist_gen, **dict(name='dummy')) + + +@pytest.mark.skipif(DOCSTRINGS_STRIPPED, reason="docstring stripped") +def test_docstrings(): + badones = [r',\s*,', r'\(\s*,', r'^\s*:'] + for distname in stats.__all__: + dist = getattr(stats, distname) + if isinstance(dist, (stats.rv_discrete, stats.rv_continuous)): + for regex in badones: + assert_(re.search(regex, dist.__doc__) is None) + + +def test_infinite_input(): + assert_almost_equal(stats.skellam.sf(np.inf, 10, 11), 0) + assert_almost_equal(stats.ncx2._cdf(np.inf, 8, 0.1), 1) + + +def test_lomax_accuracy(): + # regression test for gh-4033 + p = stats.lomax.ppf(stats.lomax.cdf(1e-100, 1), 1) + assert_allclose(p, 1e-100) + + +def test_truncexpon_accuracy(): + # regression test for gh-4035 + p = stats.truncexpon.ppf(stats.truncexpon.cdf(1e-100, 1), 1) + assert_allclose(p, 1e-100) + + +def test_rayleigh_accuracy(): + # regression test for gh-4034 + p = stats.rayleigh.isf(stats.rayleigh.sf(9, 1), 1) + assert_almost_equal(p, 9.0, decimal=15) + + +def test_genextreme_give_no_warnings(): + """regression test for gh-6219""" + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + + stats.genextreme.cdf(.5, 0) + stats.genextreme.pdf(.5, 0) + stats.genextreme.ppf(.5, 0) + stats.genextreme.logpdf(-np.inf, 0.0) + number_of_warnings_thrown = len(w) + assert_equal(number_of_warnings_thrown, 0) + + +def test_genextreme_entropy(): + # regression test for gh-5181 + euler_gamma = 0.5772156649015329 + + h = stats.genextreme.entropy(-1.0) + assert_allclose(h, 2*euler_gamma + 1, rtol=1e-14) + + h = stats.genextreme.entropy(0) + assert_allclose(h, euler_gamma + 1, rtol=1e-14) + + h = stats.genextreme.entropy(1.0) + assert_equal(h, 1) + + h = stats.genextreme.entropy(-2.0, scale=10) + assert_allclose(h, euler_gamma*3 + np.log(10) + 1, rtol=1e-14) + + h = stats.genextreme.entropy(10) + assert_allclose(h, -9*euler_gamma + 1, rtol=1e-14) + + h = stats.genextreme.entropy(-10) + assert_allclose(h, 11*euler_gamma + 1, rtol=1e-14) + + +def test_genextreme_sf_isf(): + # Expected values were computed using mpmath: + # + # import mpmath + # + # def mp_genextreme_sf(x, xi, mu=0, sigma=1): + # # Formula from wikipedia, which has a sign convention for xi that + # # is the opposite of scipy's shape parameter. + # if xi != 0: + # t = mpmath.power(1 + ((x - mu)/sigma)*xi, -1/xi) + # else: + # t = mpmath.exp(-(x - mu)/sigma) + # return 1 - mpmath.exp(-t) + # + # >>> mpmath.mp.dps = 1000 + # >>> s = mp_genextreme_sf(mpmath.mp.mpf("1e8"), mpmath.mp.mpf("0.125")) + # >>> float(s) + # 1.6777205262585625e-57 + # >>> s = mp_genextreme_sf(mpmath.mp.mpf("7.98"), mpmath.mp.mpf("-0.125")) + # >>> float(s) + # 1.52587890625e-21 + # >>> s = mp_genextreme_sf(mpmath.mp.mpf("7.98"), mpmath.mp.mpf("0")) + # >>> float(s) + # 0.00034218086528426593 + + x = 1e8 + s = stats.genextreme.sf(x, -0.125) + assert_allclose(s, 1.6777205262585625e-57) + x2 = stats.genextreme.isf(s, -0.125) + assert_allclose(x2, x) + + x = 7.98 + s = stats.genextreme.sf(x, 0.125) + assert_allclose(s, 1.52587890625e-21) + x2 = stats.genextreme.isf(s, 0.125) + assert_allclose(x2, x) + + x = 7.98 + s = stats.genextreme.sf(x, 0) + assert_allclose(s, 0.00034218086528426593) + x2 = stats.genextreme.isf(s, 0) + assert_allclose(x2, x) + + +def test_burr12_ppf_small_arg(): + prob = 1e-16 + quantile = stats.burr12.ppf(prob, 2, 3) + # The expected quantile was computed using mpmath: + # >>> import mpmath + # >>> mpmath.mp.dps = 100 + # >>> prob = mpmath.mpf('1e-16') + # >>> c = mpmath.mpf(2) + # >>> d = mpmath.mpf(3) + # >>> float(((1-prob)**(-1/d) - 1)**(1/c)) + # 5.7735026918962575e-09 + assert_allclose(quantile, 5.7735026918962575e-09) + + +def test_crystalball_function(): + """ + All values are calculated using the independent implementation of the + ROOT framework (see https://root.cern.ch/). + Corresponding ROOT code is given in the comments. + """ + X = np.linspace(-5.0, 5.0, 21)[:-1] + + # for(float x = -5.0; x < 5.0; x+=0.5) + # std::cout << ROOT::Math::crystalball_pdf(x, 1.0, 2.0, 1.0) << ", "; + calculated = stats.crystalball.pdf(X, beta=1.0, m=2.0) + expected = np.array([0.0202867, 0.0241428, 0.0292128, 0.0360652, 0.045645, + 0.059618, 0.0811467, 0.116851, 0.18258, 0.265652, + 0.301023, 0.265652, 0.18258, 0.097728, 0.0407391, + 0.013226, 0.00334407, 0.000658486, 0.000100982, + 1.20606e-05]) + assert_allclose(expected, calculated, rtol=0.001) + + # for(float x = -5.0; x < 5.0; x+=0.5) + # std::cout << ROOT::Math::crystalball_pdf(x, 2.0, 3.0, 1.0) << ", "; + calculated = stats.crystalball.pdf(X, beta=2.0, m=3.0) + expected = np.array([0.0019648, 0.00279754, 0.00417592, 0.00663121, + 0.0114587, 0.0223803, 0.0530497, 0.12726, 0.237752, + 0.345928, 0.391987, 0.345928, 0.237752, 0.12726, + 0.0530497, 0.0172227, 0.00435458, 0.000857469, + 0.000131497, 1.57051e-05]) + assert_allclose(expected, calculated, rtol=0.001) + + # for(float x = -5.0; x < 5.0; x+=0.5) { + # std::cout << ROOT::Math::crystalball_pdf(x, 2.0, 3.0, 2.0, 0.5); + # std::cout << ", "; + # } + calculated = stats.crystalball.pdf(X, beta=2.0, m=3.0, loc=0.5, scale=2.0) + expected = np.array([0.00785921, 0.0111902, 0.0167037, 0.0265249, + 0.0423866, 0.0636298, 0.0897324, 0.118876, 0.147944, + 0.172964, 0.189964, 0.195994, 0.189964, 0.172964, + 0.147944, 0.118876, 0.0897324, 0.0636298, 0.0423866, + 0.0265249]) + assert_allclose(expected, calculated, rtol=0.001) + + # for(float x = -5.0; x < 5.0; x+=0.5) + # std::cout << ROOT::Math::crystalball_cdf(x, 1.0, 2.0, 1.0) << ", "; + calculated = stats.crystalball.cdf(X, beta=1.0, m=2.0) + expected = np.array([0.12172, 0.132785, 0.146064, 0.162293, 0.18258, + 0.208663, 0.24344, 0.292128, 0.36516, 0.478254, + 0.622723, 0.767192, 0.880286, 0.94959, 0.982834, + 0.995314, 0.998981, 0.999824, 0.999976, 0.999997]) + assert_allclose(expected, calculated, rtol=0.001) + + # for(float x = -5.0; x < 5.0; x+=0.5) + # std::cout << ROOT::Math::crystalball_cdf(x, 2.0, 3.0, 1.0) << ", "; + calculated = stats.crystalball.cdf(X, beta=2.0, m=3.0) + expected = np.array([0.00442081, 0.00559509, 0.00730787, 0.00994682, + 0.0143234, 0.0223803, 0.0397873, 0.0830763, 0.173323, + 0.320592, 0.508717, 0.696841, 0.844111, 0.934357, + 0.977646, 0.993899, 0.998674, 0.999771, 0.999969, + 0.999997]) + assert_allclose(expected, calculated, rtol=0.001) + + # for(float x = -5.0; x < 5.0; x+=0.5) { + # std::cout << ROOT::Math::crystalball_cdf(x, 2.0, 3.0, 2.0, 0.5); + # std::cout << ", "; + # } + calculated = stats.crystalball.cdf(X, beta=2.0, m=3.0, loc=0.5, scale=2.0) + expected = np.array([0.0176832, 0.0223803, 0.0292315, 0.0397873, 0.0567945, + 0.0830763, 0.121242, 0.173323, 0.24011, 0.320592, + 0.411731, 0.508717, 0.605702, 0.696841, 0.777324, + 0.844111, 0.896192, 0.934357, 0.960639, 0.977646]) + assert_allclose(expected, calculated, rtol=0.001) + + +def test_crystalball_function_moments(): + """ + All values are calculated using the pdf formula and the integrate function + of Mathematica + """ + # The Last two (alpha, n) pairs test the special case n == alpha**2 + beta = np.array([2.0, 1.0, 3.0, 2.0, 3.0]) + m = np.array([3.0, 3.0, 2.0, 4.0, 9.0]) + + # The distribution should be correctly normalised + expected_0th_moment = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) + calculated_0th_moment = stats.crystalball._munp(0, beta, m) + assert_allclose(expected_0th_moment, calculated_0th_moment, rtol=0.001) + + # calculated using wolframalpha.com + # e.g. for beta = 2 and m = 3 we calculate the norm like this: + # integrate exp(-x^2/2) from -2 to infinity + + # integrate (3/2)^3*exp(-2^2/2)*(3/2-2-x)^(-3) from -infinity to -2 + norm = np.array([2.5511, 3.01873, 2.51065, 2.53983, 2.507410455]) + + a = np.array([-0.21992, -3.03265, np.inf, -0.135335, -0.003174]) + expected_1th_moment = a / norm + calculated_1th_moment = stats.crystalball._munp(1, beta, m) + assert_allclose(expected_1th_moment, calculated_1th_moment, rtol=0.001) + + a = np.array([np.inf, np.inf, np.inf, 3.2616, 2.519908]) + expected_2th_moment = a / norm + calculated_2th_moment = stats.crystalball._munp(2, beta, m) + assert_allclose(expected_2th_moment, calculated_2th_moment, rtol=0.001) + + a = np.array([np.inf, np.inf, np.inf, np.inf, -0.0577668]) + expected_3th_moment = a / norm + calculated_3th_moment = stats.crystalball._munp(3, beta, m) + assert_allclose(expected_3th_moment, calculated_3th_moment, rtol=0.001) + + a = np.array([np.inf, np.inf, np.inf, np.inf, 7.78468]) + expected_4th_moment = a / norm + calculated_4th_moment = stats.crystalball._munp(4, beta, m) + assert_allclose(expected_4th_moment, calculated_4th_moment, rtol=0.001) + + a = np.array([np.inf, np.inf, np.inf, np.inf, -1.31086]) + expected_5th_moment = a / norm + calculated_5th_moment = stats.crystalball._munp(5, beta, m) + assert_allclose(expected_5th_moment, calculated_5th_moment, rtol=0.001) + + +def test_crystalball_entropy(): + # regression test for gh-13602 + cb = stats.crystalball(2, 3) + res1 = cb.entropy() + # -20000 and 30 are negative and positive infinity, respectively + lo, hi, N = -20000, 30, 200000 + x = np.linspace(lo, hi, N) + res2 = trapezoid(entr(cb.pdf(x)), x) + assert_allclose(res1, res2, rtol=1e-7) + + +def test_invweibull_fit(): + """ + Test fitting invweibull to data. + + Here is a the same calculation in R: + + > library(evd) + > library(fitdistrplus) + > x = c(1, 1.25, 2, 2.5, 2.8, 3, 3.8, 4, 5, 8, 10, 12, 64, 99) + > result = fitdist(x, 'frechet', control=list(reltol=1e-13), + + fix.arg=list(loc=0), start=list(shape=2, scale=3)) + > result + Fitting of the distribution ' frechet ' by maximum likelihood + Parameters: + estimate Std. Error + shape 1.048482 0.2261815 + scale 3.099456 0.8292887 + Fixed parameters: + value + loc 0 + + """ + + def optimizer(func, x0, args=(), disp=0): + return fmin(func, x0, args=args, disp=disp, xtol=1e-12, ftol=1e-12) + + x = np.array([1, 1.25, 2, 2.5, 2.8, 3, 3.8, 4, 5, 8, 10, 12, 64, 99]) + c, loc, scale = stats.invweibull.fit(x, floc=0, optimizer=optimizer) + assert_allclose(c, 1.048482, rtol=5e-6) + assert loc == 0 + assert_allclose(scale, 3.099456, rtol=5e-6) + + +# Expected values were computed with mpmath. +@pytest.mark.parametrize('x, c, expected', + [(3, 1.5, 0.175064510070713299327), + (2000, 1.5, 1.11802773877318715787e-5), + (2000, 9.25, 2.92060308832269637092e-31), + (1e15, 1.5, 3.16227766016837933199884e-23)]) +def test_invweibull_sf(x, c, expected): + computed = stats.invweibull.sf(x, c) + assert_allclose(computed, expected, rtol=1e-15) + + +# Expected values were computed with mpmath. +@pytest.mark.parametrize('p, c, expected', + [(0.5, 2.5, 1.15789669836468183976), + (3e-18, 5, 3195.77171838060906447)]) +def test_invweibull_isf(p, c, expected): + computed = stats.invweibull.isf(p, c) + assert_allclose(computed, expected, rtol=1e-15) + + +@pytest.mark.parametrize( + 'df1,df2,x', + [(2, 2, [-0.5, 0.2, 1.0, 2.3]), + (4, 11, [-0.5, 0.2, 1.0, 2.3]), + (7, 17, [1, 2, 3, 4, 5])] +) +def test_ncf_edge_case(df1, df2, x): + # Test for edge case described in gh-11660. + # Non-central Fisher distribution when nc = 0 + # should be the same as Fisher distribution. + nc = 0 + expected_cdf = stats.f.cdf(x, df1, df2) + calculated_cdf = stats.ncf.cdf(x, df1, df2, nc) + assert_allclose(expected_cdf, calculated_cdf, rtol=1e-14) + + # when ncf_gen._skip_pdf will be used instead of generic pdf, + # this additional test will be useful. + expected_pdf = stats.f.pdf(x, df1, df2) + calculated_pdf = stats.ncf.pdf(x, df1, df2, nc) + assert_allclose(expected_pdf, calculated_pdf, rtol=1e-6) + + +def test_ncf_variance(): + # Regression test for gh-10658 (incorrect variance formula for ncf). + # The correct value of ncf.var(2, 6, 4), 42.75, can be verified with, for + # example, Wolfram Alpha with the expression + # Variance[NoncentralFRatioDistribution[2, 6, 4]] + # or with the implementation of the noncentral F distribution in the C++ + # library Boost. + v = stats.ncf.var(2, 6, 4) + assert_allclose(v, 42.75, rtol=1e-14) + + +def test_ncf_cdf_spotcheck(): + # Regression test for gh-15582 testing against values from R/MATLAB + # Generate check_val from R or MATLAB as follows: + # R: pf(20, df1 = 6, df2 = 33, ncp = 30.4) = 0.998921 + # MATLAB: ncfcdf(20, 6, 33, 30.4) = 0.998921 + scipy_val = stats.ncf.cdf(20, 6, 33, 30.4) + check_val = 0.998921 + assert_allclose(check_val, np.round(scipy_val, decimals=6)) + + +@pytest.mark.skipif(sys.maxsize <= 2**32, + reason="On some 32-bit the warning is not raised") +def test_ncf_ppf_issue_17026(): + # Regression test for gh-17026 + x = np.linspace(0, 1, 600) + x[0] = 1e-16 + par = (0.1, 2, 5, 0, 1) + with pytest.warns(RuntimeWarning): + q = stats.ncf.ppf(x, *par) + q0 = [stats.ncf.ppf(xi, *par) for xi in x] + assert_allclose(q, q0) + + +class TestHistogram: + def setup_method(self): + np.random.seed(1234) + + # We have 8 bins + # [1,2), [2,3), [3,4), [4,5), [5,6), [6,7), [7,8), [8,9) + # But actually np.histogram will put the last 9 also in the [8,9) bin! + # Therefore there is a slight difference below for the last bin, from + # what you might have expected. + histogram = np.histogram([1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 8, 8, 9], bins=8) + self.template = stats.rv_histogram(histogram) + + data = stats.norm.rvs(loc=1.0, scale=2.5, size=10000, random_state=123) + norm_histogram = np.histogram(data, bins=50) + self.norm_template = stats.rv_histogram(norm_histogram) + + def test_pdf(self): + values = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, + 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5]) + pdf_values = np.asarray([0.0/25.0, 0.0/25.0, 1.0/25.0, 1.0/25.0, + 2.0/25.0, 2.0/25.0, 3.0/25.0, 3.0/25.0, + 4.0/25.0, 4.0/25.0, 5.0/25.0, 5.0/25.0, + 4.0/25.0, 4.0/25.0, 3.0/25.0, 3.0/25.0, + 3.0/25.0, 3.0/25.0, 0.0/25.0, 0.0/25.0]) + assert_allclose(self.template.pdf(values), pdf_values) + + # Test explicitly the corner cases: + # As stated above the pdf in the bin [8,9) is greater than + # one would naively expect because np.histogram putted the 9 + # into the [8,9) bin. + assert_almost_equal(self.template.pdf(8.0), 3.0/25.0) + assert_almost_equal(self.template.pdf(8.5), 3.0/25.0) + # 9 is outside our defined bins [8,9) hence the pdf is already 0 + # for a continuous distribution this is fine, because a single value + # does not have a finite probability! + assert_almost_equal(self.template.pdf(9.0), 0.0/25.0) + assert_almost_equal(self.template.pdf(10.0), 0.0/25.0) + + x = np.linspace(-2, 2, 10) + assert_allclose(self.norm_template.pdf(x), + stats.norm.pdf(x, loc=1.0, scale=2.5), rtol=0.1) + + def test_cdf_ppf(self): + values = np.array([0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, + 5.0, 5.5, 6.0, 6.5, 7.0, 7.5, 8.0, 8.5, 9.0, 9.5]) + cdf_values = np.asarray([0.0/25.0, 0.0/25.0, 0.0/25.0, 0.5/25.0, + 1.0/25.0, 2.0/25.0, 3.0/25.0, 4.5/25.0, + 6.0/25.0, 8.0/25.0, 10.0/25.0, 12.5/25.0, + 15.0/25.0, 17.0/25.0, 19.0/25.0, 20.5/25.0, + 22.0/25.0, 23.5/25.0, 25.0/25.0, 25.0/25.0]) + assert_allclose(self.template.cdf(values), cdf_values) + # First three and last two values in cdf_value are not unique + assert_allclose(self.template.ppf(cdf_values[2:-1]), values[2:-1]) + + # Test of cdf and ppf are inverse functions + x = np.linspace(1.0, 9.0, 100) + assert_allclose(self.template.ppf(self.template.cdf(x)), x) + x = np.linspace(0.0, 1.0, 100) + assert_allclose(self.template.cdf(self.template.ppf(x)), x) + + x = np.linspace(-2, 2, 10) + assert_allclose(self.norm_template.cdf(x), + stats.norm.cdf(x, loc=1.0, scale=2.5), rtol=0.1) + + def test_rvs(self): + N = 10000 + sample = self.template.rvs(size=N, random_state=123) + assert_equal(np.sum(sample < 1.0), 0.0) + assert_allclose(np.sum(sample <= 2.0), 1.0/25.0 * N, rtol=0.2) + assert_allclose(np.sum(sample <= 2.5), 2.0/25.0 * N, rtol=0.2) + assert_allclose(np.sum(sample <= 3.0), 3.0/25.0 * N, rtol=0.1) + assert_allclose(np.sum(sample <= 3.5), 4.5/25.0 * N, rtol=0.1) + assert_allclose(np.sum(sample <= 4.0), 6.0/25.0 * N, rtol=0.1) + assert_allclose(np.sum(sample <= 4.5), 8.0/25.0 * N, rtol=0.1) + assert_allclose(np.sum(sample <= 5.0), 10.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 5.5), 12.5/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 6.0), 15.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 6.5), 17.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 7.0), 19.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 7.5), 20.5/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 8.0), 22.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 8.5), 23.5/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 9.0), 25.0/25.0 * N, rtol=0.05) + assert_allclose(np.sum(sample <= 9.0), 25.0/25.0 * N, rtol=0.05) + assert_equal(np.sum(sample > 9.0), 0.0) + + def test_munp(self): + for n in range(4): + assert_allclose(self.norm_template._munp(n), + stats.norm(1.0, 2.5).moment(n), rtol=0.05) + + def test_entropy(self): + assert_allclose(self.norm_template.entropy(), + stats.norm.entropy(loc=1.0, scale=2.5), rtol=0.05) + + +def test_histogram_non_uniform(): + # Tests rv_histogram works even for non-uniform bin widths + counts, bins = ([1, 1], [0, 1, 1001]) + + dist = stats.rv_histogram((counts, bins), density=False) + np.testing.assert_allclose(dist.pdf([0.5, 200]), [0.5, 0.0005]) + assert dist.median() == 1 + + dist = stats.rv_histogram((counts, bins), density=True) + np.testing.assert_allclose(dist.pdf([0.5, 200]), 1/1001) + assert dist.median() == 1001/2 + + # Omitting density produces a warning for non-uniform bins... + message = "Bin widths are not constant. Assuming..." + with pytest.warns(RuntimeWarning, match=message): + dist = stats.rv_histogram((counts, bins)) + assert dist.median() == 1001/2 # default is like `density=True` + + # ... but not for uniform bins + dist = stats.rv_histogram((counts, [0, 1, 2])) + assert dist.median() == 1 + + +class TestLogUniform: + def test_alias(self): + # This test makes sure that "reciprocal" and "loguniform" are + # aliases of the same distribution and that both are log-uniform + rng = np.random.default_rng(98643218961) + rv = stats.loguniform(10 ** -3, 10 ** 0) + rvs = rv.rvs(size=10000, random_state=rng) + + rng = np.random.default_rng(98643218961) + rv2 = stats.reciprocal(10 ** -3, 10 ** 0) + rvs2 = rv2.rvs(size=10000, random_state=rng) + + assert_allclose(rvs2, rvs) + + vals, _ = np.histogram(np.log10(rvs), bins=10) + assert 900 <= vals.min() <= vals.max() <= 1100 + assert np.abs(np.median(vals) - 1000) <= 10 + + @pytest.mark.parametrize("method", ['mle', 'mm']) + def test_fit_override(self, method): + # loguniform is overparameterized, so check that fit override enforces + # scale=1 unless fscale is provided by the user + rng = np.random.default_rng(98643218961) + rvs = stats.loguniform.rvs(0.1, 1, size=1000, random_state=rng) + + a, b, loc, scale = stats.loguniform.fit(rvs, method=method) + assert scale == 1 + + a, b, loc, scale = stats.loguniform.fit(rvs, fscale=2, method=method) + assert scale == 2 + + def test_overflow(self): + # original formulation had overflow issues; check that this is resolved + # Extensive accuracy tests elsewhere, no need to test all methods + rng = np.random.default_rng(7136519550773909093) + a, b = 1e-200, 1e200 + dist = stats.loguniform(a, b) + + # test roundtrip error + cdf = rng.uniform(0, 1, size=1000) + assert_allclose(dist.cdf(dist.ppf(cdf)), cdf) + rvs = dist.rvs(size=1000) + assert_allclose(dist.ppf(dist.cdf(rvs)), rvs) + + # test a property of the pdf (and that there is no overflow) + x = 10.**np.arange(-200, 200) + pdf = dist.pdf(x) # no overflow + assert_allclose(pdf[:-1]/pdf[1:], 10) + + # check munp against wikipedia reference + mean = (b - a)/(np.log(b) - np.log(a)) + assert_allclose(dist.mean(), mean) + + +class TestArgus: + def test_argus_rvs_large_chi(self): + # test that the algorithm can handle large values of chi + x = stats.argus.rvs(50, size=500, random_state=325) + assert_almost_equal(stats.argus(50).mean(), x.mean(), decimal=4) + + @pytest.mark.parametrize('chi, random_state', [ + [0.1, 325], # chi <= 0.5: rejection method case 1 + [1.3, 155], # 0.5 < chi <= 1.8: rejection method case 2 + [3.5, 135] # chi > 1.8: transform conditional Gamma distribution + ]) + def test_rvs(self, chi, random_state): + x = stats.argus.rvs(chi, size=500, random_state=random_state) + _, p = stats.kstest(x, "argus", (chi, )) + assert_(p > 0.05) + + @pytest.mark.parametrize('chi', [1e-9, 1e-6]) + def test_rvs_small_chi(self, chi): + # test for gh-11699 => rejection method case 1 can even handle chi=0 + # the CDF of the distribution for chi=0 is 1 - (1 - x**2)**(3/2) + # test rvs against distribution of limit chi=0 + r = stats.argus.rvs(chi, size=500, random_state=890981) + _, p = stats.kstest(r, lambda x: 1 - (1 - x**2)**(3/2)) + assert_(p > 0.05) + + # Expected values were computed with mpmath. + @pytest.mark.parametrize('chi, expected_mean', + [(1, 0.6187026683551835), + (10, 0.984805536783744), + (40, 0.9990617659702923), + (60, 0.9995831885165300), + (99, 0.9998469348663028)]) + def test_mean(self, chi, expected_mean): + m = stats.argus.mean(chi, scale=1) + assert_allclose(m, expected_mean, rtol=1e-13) + + # Expected values were computed with mpmath. + @pytest.mark.parametrize('chi, expected_var, rtol', + [(1, 0.05215651254197807, 1e-13), + (10, 0.00015805472008165595, 1e-11), + (40, 5.877763210262901e-07, 1e-8), + (60, 1.1590179389611416e-07, 1e-8), + (99, 1.5623277006064666e-08, 1e-8)]) + def test_var(self, chi, expected_var, rtol): + v = stats.argus.var(chi, scale=1) + assert_allclose(v, expected_var, rtol=rtol) + + # Expected values were computed with mpmath (code: see gh-13370). + @pytest.mark.parametrize('chi, expected, rtol', + [(0.9, 0.07646314974436118, 1e-14), + (0.5, 0.015429797891863365, 1e-14), + (0.1, 0.0001325825293278049, 1e-14), + (0.01, 1.3297677078224565e-07, 1e-15), + (1e-3, 1.3298072023958999e-10, 1e-14), + (1e-4, 1.3298075973486862e-13, 1e-14), + (1e-6, 1.32980760133771e-19, 1e-14), + (1e-9, 1.329807601338109e-28, 1e-15)]) + def test_argus_phi_small_chi(self, chi, expected, rtol): + assert_allclose(_argus_phi(chi), expected, rtol=rtol) + + # Expected values were computed with mpmath (code: see gh-13370). + @pytest.mark.parametrize( + 'chi, expected', + [(0.5, (0.28414073302940573, 1.2742227939992954, 1.2381254688255896)), + (0.2, (0.296172952995264, 1.2951290588110516, 1.1865767100877576)), + (0.1, (0.29791447523536274, 1.29806307956989, 1.1793168289857412)), + (0.01, (0.2984904104866452, 1.2990283628160553, 1.1769268414080531)), + (1e-3, (0.298496172925224, 1.2990380082487925, 1.176902956021053)), + (1e-4, (0.29849623054991836, 1.2990381047023793, 1.1769027171686324)), + (1e-6, (0.2984962311319278, 1.2990381056765605, 1.1769027147562232)), + (1e-9, (0.298496231131986, 1.299038105676658, 1.1769027147559818))]) + def test_pdf_small_chi(self, chi, expected): + x = np.array([0.1, 0.5, 0.9]) + assert_allclose(stats.argus.pdf(x, chi), expected, rtol=1e-13) + + # Expected values were computed with mpmath (code: see gh-13370). + @pytest.mark.parametrize( + 'chi, expected', + [(0.5, (0.9857660526895221, 0.6616565930168475, 0.08796070398429937)), + (0.2, (0.9851555052359501, 0.6514666238985464, 0.08362690023746594)), + (0.1, (0.9850670974995661, 0.6500061310508574, 0.08302050640683846)), + (0.01, (0.9850378582451867, 0.6495239242251358, 0.08282109244852445)), + (1e-3, (0.9850375656906663, 0.6495191015522573, 0.08281910005231098)), + (1e-4, (0.9850375627651049, 0.6495190533254682, 0.08281908012852317)), + (1e-6, (0.9850375627355568, 0.6495190528383777, 0.08281907992729293)), + (1e-9, (0.9850375627355538, 0.649519052838329, 0.0828190799272728))]) + def test_sf_small_chi(self, chi, expected): + x = np.array([0.1, 0.5, 0.9]) + assert_allclose(stats.argus.sf(x, chi), expected, rtol=1e-14) + + # Expected values were computed with mpmath (code: see gh-13370). + @pytest.mark.parametrize( + 'chi, expected', + [(0.5, (0.0142339473104779, 0.3383434069831524, 0.9120392960157007)), + (0.2, (0.014844494764049919, 0.34853337610145363, 0.916373099762534)), + (0.1, (0.014932902500433911, 0.34999386894914264, 0.9169794935931616)), + (0.01, (0.014962141754813293, 0.35047607577486417, 0.9171789075514756)), + (1e-3, (0.01496243430933372, 0.35048089844774266, 0.917180899947689)), + (1e-4, (0.014962437234895118, 0.3504809466745317, 0.9171809198714769)), + (1e-6, (0.01496243726444329, 0.3504809471616223, 0.9171809200727071)), + (1e-9, (0.014962437264446245, 0.350480947161671, 0.9171809200727272))]) + def test_cdf_small_chi(self, chi, expected): + x = np.array([0.1, 0.5, 0.9]) + assert_allclose(stats.argus.cdf(x, chi), expected, rtol=1e-12) + + # Expected values were computed with mpmath (code: see gh-13370). + @pytest.mark.parametrize( + 'chi, expected, rtol', + [(0.5, (0.5964284712757741, 0.052890651988588604), 1e-12), + (0.101, (0.5893490968089076, 0.053017469847275685), 1e-11), + (0.1, (0.5893431757009437, 0.05301755449499372), 1e-13), + (0.01, (0.5890515677940915, 0.05302167905837031), 1e-13), + (1e-3, (0.5890486520005177, 0.053021719862088104), 1e-13), + (1e-4, (0.5890486228426105, 0.0530217202700811), 1e-13), + (1e-6, (0.5890486225481156, 0.05302172027420182), 1e-13), + (1e-9, (0.5890486225480862, 0.05302172027420224), 1e-13)]) + def test_stats_small_chi(self, chi, expected, rtol): + val = stats.argus.stats(chi, moments='mv') + assert_allclose(val, expected, rtol=rtol) + + +class TestNakagami: + + def test_logpdf(self): + # Test nakagami logpdf for an input where the PDF is smaller + # than can be represented with 64 bit floating point. + # The expected value of logpdf was computed with mpmath: + # + # def logpdf(x, nu): + # x = mpmath.mpf(x) + # nu = mpmath.mpf(nu) + # return (mpmath.log(2) + nu*mpmath.log(nu) - + # mpmath.loggamma(nu) + (2*nu - 1)*mpmath.log(x) - + # nu*x**2) + # + nu = 2.5 + x = 25 + logp = stats.nakagami.logpdf(x, nu) + assert_allclose(logp, -1546.9253055607549) + + def test_sf_isf(self): + # Test nakagami sf and isf when the survival function + # value is very small. + # The expected value of the survival function was computed + # with mpmath: + # + # def sf(x, nu): + # x = mpmath.mpf(x) + # nu = mpmath.mpf(nu) + # return mpmath.gammainc(nu, nu*x*x, regularized=True) + # + nu = 2.5 + x0 = 5.0 + sf = stats.nakagami.sf(x0, nu) + assert_allclose(sf, 2.736273158588307e-25, rtol=1e-13) + # Check round trip back to x0. + x1 = stats.nakagami.isf(sf, nu) + assert_allclose(x1, x0, rtol=1e-13) + + @pytest.mark.parametrize("m, ref", + [(5, -0.097341814372152), + (0.5, 0.7257913526447274), + (10, -0.43426184310934907)]) + def test_entropy(self, m, ref): + # from sympy import * + # from mpmath import mp + # import numpy as np + # v, x = symbols('v, x', real=True, positive=True) + # pdf = 2 * v ** v / gamma(v) * x ** (2 * v - 1) * exp(-v * x ** 2) + # h = simplify(simplify(integrate(-pdf * log(pdf), (x, 0, oo)))) + # entropy = lambdify(v, h, 'mpmath') + # mp.dps = 200 + # nu = 5 + # ref = np.float64(entropy(mp.mpf(nu))) + # print(ref) + assert_allclose(stats.nakagami.entropy(m), ref, rtol=1.1e-14) + + @pytest.mark.parametrize("m, ref", + [(1e-100, -5.0e+99), (1e-10, -4999999965.442979), + (9.999e6, -7.333206478668433), (1.001e7, -7.3337562313259825), + (1e10, -10.787134112333835), (1e100, -114.40346329705756)]) + def test_extreme_nu(self, m, ref): + assert_allclose(stats.nakagami.entropy(m), ref) + + def test_entropy_overflow(self): + assert np.isfinite(stats.nakagami._entropy(1e100)) + assert np.isfinite(stats.nakagami._entropy(1e-100)) + + @pytest.mark.parametrize("nu, ref", + [(1e10, 0.9999999999875), + (1e3, 0.9998750078173821), + (1e-10, 1.772453850659802e-05)]) + def test_mean(self, nu, ref): + # reference values were computed with mpmath + # from mpmath import mp + # mp.dps = 500 + # nu = mp.mpf(1e10) + # float(mp.rf(nu, mp.mpf(0.5))/mp.sqrt(nu)) + assert_allclose(stats.nakagami.mean(nu), ref, rtol=1e-12) + + @pytest.mark.xfail(reason="Fit of nakagami not reliable, see gh-10908.") + @pytest.mark.parametrize('nu', [1.6, 2.5, 3.9]) + @pytest.mark.parametrize('loc', [25.0, 10, 35]) + @pytest.mark.parametrize('scale', [13, 5, 20]) + def test_fit(self, nu, loc, scale): + # Regression test for gh-13396 (21/27 cases failed previously) + # The first tuple of the parameters' values is discussed in gh-10908 + N = 100 + samples = stats.nakagami.rvs(size=N, nu=nu, loc=loc, + scale=scale, random_state=1337) + nu_est, loc_est, scale_est = stats.nakagami.fit(samples) + assert_allclose(nu_est, nu, rtol=0.2) + assert_allclose(loc_est, loc, rtol=0.2) + assert_allclose(scale_est, scale, rtol=0.2) + + def dlogl_dnu(nu, loc, scale): + return ((-2*nu + 1) * np.sum(1/(samples - loc)) + + 2*nu/scale**2 * np.sum(samples - loc)) + + def dlogl_dloc(nu, loc, scale): + return (N * (1 + np.log(nu) - polygamma(0, nu)) + + 2 * np.sum(np.log((samples - loc) / scale)) + - np.sum(((samples - loc) / scale)**2)) + + def dlogl_dscale(nu, loc, scale): + return (- 2 * N * nu / scale + + 2 * nu / scale ** 3 * np.sum((samples - loc) ** 2)) + + assert_allclose(dlogl_dnu(nu_est, loc_est, scale_est), 0, atol=1e-3) + assert_allclose(dlogl_dloc(nu_est, loc_est, scale_est), 0, atol=1e-3) + assert_allclose(dlogl_dscale(nu_est, loc_est, scale_est), 0, atol=1e-3) + + @pytest.mark.parametrize('loc', [25.0, 10, 35]) + @pytest.mark.parametrize('scale', [13, 5, 20]) + def test_fit_nu(self, loc, scale): + # For nu = 0.5, we have analytical values for + # the MLE of the loc and the scale + nu = 0.5 + n = 100 + samples = stats.nakagami.rvs(size=n, nu=nu, loc=loc, + scale=scale, random_state=1337) + nu_est, loc_est, scale_est = stats.nakagami.fit(samples, f0=nu) + + # Analytical values + loc_theo = np.min(samples) + scale_theo = np.sqrt(np.mean((samples - loc_est) ** 2)) + + assert_allclose(nu_est, nu, rtol=1e-7) + assert_allclose(loc_est, loc_theo, rtol=1e-7) + assert_allclose(scale_est, scale_theo, rtol=1e-7) + + +class TestWrapCauchy: + + def test_cdf_shape_broadcasting(self): + # Regression test for gh-13791. + # Check that wrapcauchy.cdf broadcasts the shape parameter + # correctly. + c = np.array([[0.03, 0.25], [0.5, 0.75]]) + x = np.array([[1.0], [4.0]]) + p = stats.wrapcauchy.cdf(x, c) + assert p.shape == (2, 2) + scalar_values = [stats.wrapcauchy.cdf(x1, c1) + for (x1, c1) in np.nditer((x, c))] + assert_allclose(p.ravel(), scalar_values, rtol=1e-13) + + def test_cdf_center(self): + p = stats.wrapcauchy.cdf(np.pi, 0.03) + assert_allclose(p, 0.5, rtol=1e-14) + + def test_cdf(self): + x1 = 1.0 # less than pi + x2 = 4.0 # greater than pi + c = 0.75 + p = stats.wrapcauchy.cdf([x1, x2], c) + cr = (1 + c)/(1 - c) + assert_allclose(p[0], np.arctan(cr*np.tan(x1/2))/np.pi) + assert_allclose(p[1], 1 - np.arctan(cr*np.tan(np.pi - x2/2))/np.pi) + + +def test_rvs_no_size_error(): + # _rvs methods must have parameter `size`; see gh-11394 + class rvs_no_size_gen(stats.rv_continuous): + def _rvs(self): + return 1 + + rvs_no_size = rvs_no_size_gen(name='rvs_no_size') + + with assert_raises(TypeError, match=r"_rvs\(\) got (an|\d) unexpected"): + rvs_no_size.rvs() + + +@pytest.mark.parametrize('distname, args', invdistdiscrete + invdistcont) +def test_support_gh13294_regression(distname, args): + if distname in skip_test_support_gh13294_regression: + pytest.skip(f"skipping test for the support method for " + f"distribution {distname}.") + dist = getattr(stats, distname) + # test support method with invalid arguments + if isinstance(dist, stats.rv_continuous): + # test with valid scale + if len(args) != 0: + a0, b0 = dist.support(*args) + assert_equal(a0, np.nan) + assert_equal(b0, np.nan) + # test with invalid scale + # For some distributions, that take no parameters, + # the case of only invalid scale occurs and hence, + # it is implicitly tested in this test case. + loc1, scale1 = 0, -1 + a1, b1 = dist.support(*args, loc1, scale1) + assert_equal(a1, np.nan) + assert_equal(b1, np.nan) + else: + a, b = dist.support(*args) + assert_equal(a, np.nan) + assert_equal(b, np.nan) + + +def test_support_broadcasting_gh13294_regression(): + a0, b0 = stats.norm.support([0, 0, 0, 1], [1, 1, 1, -1]) + ex_a0 = np.array([-np.inf, -np.inf, -np.inf, np.nan]) + ex_b0 = np.array([np.inf, np.inf, np.inf, np.nan]) + assert_equal(a0, ex_a0) + assert_equal(b0, ex_b0) + assert a0.shape == ex_a0.shape + assert b0.shape == ex_b0.shape + + a1, b1 = stats.norm.support([], []) + ex_a1, ex_b1 = np.array([]), np.array([]) + assert_equal(a1, ex_a1) + assert_equal(b1, ex_b1) + assert a1.shape == ex_a1.shape + assert b1.shape == ex_b1.shape + + a2, b2 = stats.norm.support([0, 0, 0, 1], [-1]) + ex_a2 = np.array(4*[np.nan]) + ex_b2 = np.array(4*[np.nan]) + assert_equal(a2, ex_a2) + assert_equal(b2, ex_b2) + assert a2.shape == ex_a2.shape + assert b2.shape == ex_b2.shape + + +def test_stats_broadcasting_gh14953_regression(): + # test case in gh14953 + loc = [0., 0.] + scale = [[1.], [2.], [3.]] + assert_equal(stats.norm.var(loc, scale), [[1., 1.], [4., 4.], [9., 9.]]) + # test some edge cases + loc = np.empty((0, )) + scale = np.empty((1, 0)) + assert stats.norm.var(loc, scale).shape == (1, 0) + + +# Check a few values of the cosine distribution's cdf, sf, ppf and +# isf methods. Expected values were computed with mpmath. + +@pytest.mark.parametrize('x, expected', + [(-3.14159, 4.956444476505336e-19), + (3.14, 0.9999999998928399)]) +def test_cosine_cdf_sf(x, expected): + assert_allclose(stats.cosine.cdf(x), expected) + assert_allclose(stats.cosine.sf(-x), expected) + + +@pytest.mark.parametrize('p, expected', + [(1e-6, -3.1080612413765905), + (1e-17, -3.141585429601399), + (0.975, 2.1447547020964923)]) +def test_cosine_ppf_isf(p, expected): + assert_allclose(stats.cosine.ppf(p), expected) + assert_allclose(stats.cosine.isf(p), -expected) + + +def test_cosine_logpdf_endpoints(): + logp = stats.cosine.logpdf([-np.pi, np.pi]) + # reference value calculated using mpmath assuming `np.cos(-1)` is four + # floating point numbers too high. See gh-18382. + assert_array_less(logp, -37.18838327496655) + + +def test_distr_params_lists(): + # distribution objects are extra distributions added in + # test_discrete_basic. All other distributions are strings (names) + # and so we only choose those to compare whether both lists match. + discrete_distnames = {name for name, _ in distdiscrete + if isinstance(name, str)} + invdiscrete_distnames = {name for name, _ in invdistdiscrete} + assert discrete_distnames == invdiscrete_distnames + + cont_distnames = {name for name, _ in distcont} + invcont_distnames = {name for name, _ in invdistcont} + assert cont_distnames == invcont_distnames + + +def test_moment_order_4(): + # gh-13655 reported that if a distribution has a `_stats` method that + # accepts the `moments` parameter, then if the distribution's `moment` + # method is called with `order=4`, the faster/more accurate`_stats` gets + # called, but the results aren't used, and the generic `_munp` method is + # called to calculate the moment anyway. This tests that the issue has + # been fixed. + # stats.skewnorm._stats accepts the `moments` keyword + stats.skewnorm._stats(a=0, moments='k') # no failure = has `moments` + # When `moment` is called, `_stats` is used, so the moment is very accurate + # (exactly equal to Pearson's kurtosis of the normal distribution, 3) + assert stats.skewnorm.moment(order=4, a=0) == 3.0 + # At the time of gh-13655, skewnorm._munp() used the generic method + # to compute its result, which was inefficient and not very accurate. + # At that time, the following assertion would fail. skewnorm._munp() + # has since been made more accurate and efficient, so now this test + # is expected to pass. + assert stats.skewnorm._munp(4, 0) == 3.0 + + +class TestRelativisticBW: + @pytest.fixture + def ROOT_pdf_sample_data(self): + """Sample data points for pdf computed with CERN's ROOT + + See - https://root.cern/ + + Uses ROOT.TMath.BreitWignerRelativistic, available in ROOT + versions 6.27+ + + pdf calculated for Z0 Boson, W Boson, and Higgs Boson for + x in `np.linspace(0, 200, 401)`. + """ + data = np.load( + Path(__file__).parent / + 'data/rel_breitwigner_pdf_sample_data_ROOT.npy' + ) + data = np.rec.fromarrays(data.T, names='x,pdf,rho,gamma') + return data + + @pytest.mark.parametrize( + "rho,gamma,rtol", [ + (36.545206797050334, 2.4952, 5e-14), # Z0 Boson + (38.55107913669065, 2.085, 1e-14), # W Boson + (96292.3076923077, 0.0013, 5e-13), # Higgs Boson + ] + ) + def test_pdf_against_ROOT(self, ROOT_pdf_sample_data, rho, gamma, rtol): + data = ROOT_pdf_sample_data[ + (ROOT_pdf_sample_data['rho'] == rho) + & (ROOT_pdf_sample_data['gamma'] == gamma) + ] + x, pdf = data['x'], data['pdf'] + assert_allclose( + pdf, stats.rel_breitwigner.pdf(x, rho, scale=gamma), rtol=rtol + ) + + @pytest.mark.parametrize("rho, Gamma, rtol", [ + (36.545206797050334, 2.4952, 5e-13), # Z0 Boson + (38.55107913669065, 2.085, 5e-13), # W Boson + (96292.3076923077, 0.0013, 5e-10), # Higgs Boson + ] + ) + def test_pdf_against_simple_implementation(self, rho, Gamma, rtol): + # reference implementation straight from formulas on Wikipedia [1] + def pdf(E, M, Gamma): + gamma = np.sqrt(M**2 * (M**2 + Gamma**2)) + k = (2 * np.sqrt(2) * M * Gamma * gamma + / (np.pi * np.sqrt(M**2 + gamma))) + return k / ((E**2 - M**2)**2 + M**2*Gamma**2) + # get reasonable values at which to evaluate the CDF + p = np.linspace(0.05, 0.95, 10) + x = stats.rel_breitwigner.ppf(p, rho, scale=Gamma) + res = stats.rel_breitwigner.pdf(x, rho, scale=Gamma) + ref = pdf(x, rho*Gamma, Gamma) + assert_allclose(res, ref, rtol=rtol) + + @pytest.mark.parametrize( + "rho,gamma", [ + pytest.param( + 36.545206797050334, 2.4952, marks=pytest.mark.slow + ), # Z0 Boson + pytest.param( + 38.55107913669065, 2.085, marks=pytest.mark.xslow + ), # W Boson + pytest.param( + 96292.3076923077, 0.0013, marks=pytest.mark.xslow + ), # Higgs Boson + ] + ) + def test_fit_floc(self, rho, gamma): + """Tests fit for cases where floc is set. + + `rel_breitwigner` has special handling for these cases. + """ + seed = 6936804688480013683 + rng = np.random.default_rng(seed) + data = stats.rel_breitwigner.rvs( + rho, scale=gamma, size=1000, random_state=rng + ) + fit = stats.rel_breitwigner.fit(data, floc=0) + assert_allclose((fit[0], fit[2]), (rho, gamma), rtol=2e-1) + assert fit[1] == 0 + # Check again with fscale set. + fit = stats.rel_breitwigner.fit(data, floc=0, fscale=gamma) + assert_allclose(fit[0], rho, rtol=1e-2) + assert (fit[1], fit[2]) == (0, gamma) + + +class TestJohnsonSU: + @pytest.mark.parametrize("case", [ # a, b, loc, scale, m1, m2, g1, g2 + (-0.01, 1.1, 0.02, 0.0001, 0.02000137427557091, + 2.1112742956578063e-08, 0.05989781342460999, 20.36324408592951-3), + (2.554395574161155, 2.2482281679651965, 0, 1, -1.54215386737391, + 0.7629882028469993, -1.256656139406788, 6.303058419339775-3)]) + def test_moment_gh18071(self, case): + # gh-18071 reported an IntegrationWarning emitted by johnsonsu.stats + # Check that the warning is no longer emitted and that the values + # are accurate compared against results from Mathematica. + # Reference values from Mathematica, e.g. + # Mean[JohnsonDistribution["SU",-0.01, 1.1, 0.02, 0.0001]] + res = stats.johnsonsu.stats(*case[:4], moments='mvsk') + assert_allclose(res, case[4:], rtol=1e-14) + + +class TestTruncPareto: + def test_pdf(self): + # PDF is that of the truncated pareto distribution + b, c = 1.8, 5.3 + x = np.linspace(1.8, 5.3) + res = stats.truncpareto(b, c).pdf(x) + ref = stats.pareto(b).pdf(x) / stats.pareto(b).cdf(c) + assert_allclose(res, ref) + + @pytest.mark.parametrize('fix_loc', [True, False]) + @pytest.mark.parametrize('fix_scale', [True, False]) + @pytest.mark.parametrize('fix_b', [True, False]) + @pytest.mark.parametrize('fix_c', [True, False]) + def test_fit(self, fix_loc, fix_scale, fix_b, fix_c): + + rng = np.random.default_rng(6747363148258237171) + b, c, loc, scale = 1.8, 5.3, 1, 2.5 + dist = stats.truncpareto(b, c, loc=loc, scale=scale) + data = dist.rvs(size=500, random_state=rng) + + kwds = {} + if fix_loc: + kwds['floc'] = loc + if fix_scale: + kwds['fscale'] = scale + if fix_b: + kwds['f0'] = b + if fix_c: + kwds['f1'] = c + + if fix_loc and fix_scale and fix_b and fix_c: + message = "All parameters fixed. There is nothing to optimize." + with pytest.raises(RuntimeError, match=message): + stats.truncpareto.fit(data, **kwds) + else: + _assert_less_or_close_loglike(stats.truncpareto, data, **kwds) + + +class TestKappa3: + def test_sf(self): + # During development of gh-18822, we found that the override of + # kappa3.sf could experience overflow where the version in main did + # not. Check that this does not happen in final implementation. + sf0 = 1 - stats.kappa3.cdf(0.5, 1e5) + sf1 = stats.kappa3.sf(0.5, 1e5) + assert_allclose(sf1, sf0) + + +# Cases are (distribution name, log10 of smallest probability mass to test, +# log10 of the complement of the largest probability mass to test, atol, +# rtol). None uses default values. +@pytest.mark.parametrize("case", [("kappa3", None, None, None, None), + ("loglaplace", None, None, None, None), + ("lognorm", None, None, None, None), + ("lomax", None, None, None, None), + ("pareto", None, None, None, None),]) +def test_sf_isf_overrides(case): + # Test that SF is the inverse of ISF. Supplements + # `test_continuous_basic.check_sf_isf` for distributions with overridden + # `sf` and `isf` methods. + distname, lp1, lp2, atol, rtol = case + + lpm = np.log10(0.5) # log10 of the probability mass at the median + lp1 = lp1 or -290 + lp2 = lp2 or -14 + atol = atol or 0 + rtol = rtol or 1e-12 + dist = getattr(stats, distname) + params = dict(distcont)[distname] + dist_frozen = dist(*params) + + # Test (very deep) right tail to median. We can benchmark with random + # (loguniform) points, but strictly logspaced points are fine for tests. + ref = np.logspace(lp1, lpm) + res = dist_frozen.sf(dist_frozen.isf(ref)) + assert_allclose(res, ref, atol=atol, rtol=rtol) + + # test median to left tail + ref = 1 - np.logspace(lp2, lpm, 20) + res = dist_frozen.sf(dist_frozen.isf(ref)) + assert_allclose(res, ref, atol=atol, rtol=rtol) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_entropy.py b/.venv/Lib/site-packages/scipy/stats/tests/test_entropy.py new file mode 100644 index 0000000000000000000000000000000000000000..901d5d1ee7c4d07ac11f579d9223407a698dcf38 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_entropy.py @@ -0,0 +1,286 @@ +import numpy as np +from numpy.testing import assert_equal, assert_allclose +# avoid new uses of the following; prefer assert/np.testing.assert_allclose +from numpy.testing import (assert_, assert_almost_equal, + assert_array_almost_equal) + +import pytest +from pytest import raises as assert_raises +import scipy.stats as stats + + +class TestEntropy: + def test_entropy_positive(self): + # See ticket #497 + pk = [0.5, 0.2, 0.3] + qk = [0.1, 0.25, 0.65] + eself = stats.entropy(pk, pk) + edouble = stats.entropy(pk, qk) + assert_(0.0 == eself) + assert_(edouble >= 0.0) + + def test_entropy_base(self): + pk = np.ones(16, float) + S = stats.entropy(pk, base=2.) + assert_(abs(S - 4.) < 1.e-5) + + qk = np.ones(16, float) + qk[:8] = 2. + S = stats.entropy(pk, qk) + S2 = stats.entropy(pk, qk, base=2.) + assert_(abs(S/S2 - np.log(2.)) < 1.e-5) + + def test_entropy_zero(self): + # Test for PR-479 + assert_almost_equal(stats.entropy([0, 1, 2]), 0.63651416829481278, + decimal=12) + + def test_entropy_2d(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]] + assert_array_almost_equal(stats.entropy(pk, qk), + [0.1933259, 0.18609809]) + + def test_entropy_2d_zero(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + qk = [[0.0, 0.1], [0.3, 0.6], [0.5, 0.3]] + assert_array_almost_equal(stats.entropy(pk, qk), + [np.inf, 0.18609809]) + + pk[0][0] = 0.0 + assert_array_almost_equal(stats.entropy(pk, qk), + [0.17403988, 0.18609809]) + + def test_entropy_base_2d_nondefault_axis(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + assert_array_almost_equal(stats.entropy(pk, axis=1), + [0.63651417, 0.63651417, 0.66156324]) + + def test_entropy_2d_nondefault_axis(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]] + assert_array_almost_equal(stats.entropy(pk, qk, axis=1), + [0.231049, 0.231049, 0.127706]) + + def test_entropy_raises_value_error(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + qk = [[0.1, 0.2], [0.6, 0.3]] + assert_raises(ValueError, stats.entropy, pk, qk) + + def test_base_entropy_with_axis_0_is_equal_to_default(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + assert_array_almost_equal(stats.entropy(pk, axis=0), + stats.entropy(pk)) + + def test_entropy_with_axis_0_is_equal_to_default(self): + pk = [[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]] + qk = [[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]] + assert_array_almost_equal(stats.entropy(pk, qk, axis=0), + stats.entropy(pk, qk)) + + def test_base_entropy_transposed(self): + pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]) + assert_array_almost_equal(stats.entropy(pk.T).T, + stats.entropy(pk, axis=1)) + + def test_entropy_transposed(self): + pk = np.array([[0.1, 0.2], [0.6, 0.3], [0.3, 0.5]]) + qk = np.array([[0.2, 0.1], [0.3, 0.6], [0.5, 0.3]]) + assert_array_almost_equal(stats.entropy(pk.T, qk.T).T, + stats.entropy(pk, qk, axis=1)) + + def test_entropy_broadcasting(self): + np.random.rand(0) + x = np.random.rand(3) + y = np.random.rand(2, 1) + res = stats.entropy(x, y, axis=-1) + assert_equal(res[0], stats.entropy(x, y[0])) + assert_equal(res[1], stats.entropy(x, y[1])) + + def test_entropy_shape_mismatch(self): + x = np.random.rand(10, 1, 12) + y = np.random.rand(11, 2) + message = "Array shapes are incompatible for broadcasting." + with pytest.raises(ValueError, match=message): + stats.entropy(x, y) + + def test_input_validation(self): + x = np.random.rand(10) + message = "`base` must be a positive number." + with pytest.raises(ValueError, match=message): + stats.entropy(x, base=-2) + + +class TestDifferentialEntropy: + """ + Vasicek results are compared with the R package vsgoftest. + + # library(vsgoftest) + # + # samp <- c() + # entropy.estimate(x = samp, window = ) + + """ + + def test_differential_entropy_vasicek(self): + + random_state = np.random.RandomState(0) + values = random_state.standard_normal(100) + + entropy = stats.differential_entropy(values, method='vasicek') + assert_allclose(entropy, 1.342551, rtol=1e-6) + + entropy = stats.differential_entropy(values, window_length=1, + method='vasicek') + assert_allclose(entropy, 1.122044, rtol=1e-6) + + entropy = stats.differential_entropy(values, window_length=8, + method='vasicek') + assert_allclose(entropy, 1.349401, rtol=1e-6) + + def test_differential_entropy_vasicek_2d_nondefault_axis(self): + random_state = np.random.RandomState(0) + values = random_state.standard_normal((3, 100)) + + entropy = stats.differential_entropy(values, axis=1, method='vasicek') + assert_allclose( + entropy, + [1.342551, 1.341826, 1.293775], + rtol=1e-6, + ) + + entropy = stats.differential_entropy(values, axis=1, window_length=1, + method='vasicek') + assert_allclose( + entropy, + [1.122044, 1.102944, 1.129616], + rtol=1e-6, + ) + + entropy = stats.differential_entropy(values, axis=1, window_length=8, + method='vasicek') + assert_allclose( + entropy, + [1.349401, 1.338514, 1.292332], + rtol=1e-6, + ) + + def test_differential_entropy_raises_value_error(self): + random_state = np.random.RandomState(0) + values = random_state.standard_normal((3, 100)) + + error_str = ( + r"Window length \({window_length}\) must be positive and less " + r"than half the sample size \({sample_size}\)." + ) + + sample_size = values.shape[1] + + for window_length in {-1, 0, sample_size//2, sample_size}: + + formatted_error_str = error_str.format( + window_length=window_length, + sample_size=sample_size, + ) + + with assert_raises(ValueError, match=formatted_error_str): + stats.differential_entropy( + values, + window_length=window_length, + axis=1, + ) + + def test_base_differential_entropy_with_axis_0_is_equal_to_default(self): + random_state = np.random.RandomState(0) + values = random_state.standard_normal((100, 3)) + + entropy = stats.differential_entropy(values, axis=0) + default_entropy = stats.differential_entropy(values) + assert_allclose(entropy, default_entropy) + + def test_base_differential_entropy_transposed(self): + random_state = np.random.RandomState(0) + values = random_state.standard_normal((3, 100)) + + assert_allclose( + stats.differential_entropy(values.T).T, + stats.differential_entropy(values, axis=1), + ) + + def test_input_validation(self): + x = np.random.rand(10) + + message = "`base` must be a positive number or `None`." + with pytest.raises(ValueError, match=message): + stats.differential_entropy(x, base=-2) + + message = "`method` must be one of..." + with pytest.raises(ValueError, match=message): + stats.differential_entropy(x, method='ekki-ekki') + + @pytest.mark.parametrize('method', ['vasicek', 'van es', + 'ebrahimi', 'correa']) + def test_consistency(self, method): + # test that method is a consistent estimator + n = 10000 if method == 'correa' else 1000000 + rvs = stats.norm.rvs(size=n, random_state=0) + expected = stats.norm.entropy() + res = stats.differential_entropy(rvs, method=method) + assert_allclose(res, expected, rtol=0.005) + + # values from differential_entropy reference [6], table 1, n=50, m=7 + norm_rmse_std_cases = { # method: (RMSE, STD) + 'vasicek': (0.198, 0.109), + 'van es': (0.212, 0.110), + 'correa': (0.135, 0.112), + 'ebrahimi': (0.128, 0.109) + } + + @pytest.mark.parametrize('method, expected', + list(norm_rmse_std_cases.items())) + def test_norm_rmse_std(self, method, expected): + # test that RMSE and standard deviation of estimators matches values + # given in differential_entropy reference [6]. Incidentally, also + # tests vectorization. + reps, n, m = 10000, 50, 7 + rmse_expected, std_expected = expected + rvs = stats.norm.rvs(size=(reps, n), random_state=0) + true_entropy = stats.norm.entropy() + res = stats.differential_entropy(rvs, window_length=m, + method=method, axis=-1) + assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)), + rmse_expected, atol=0.005) + assert_allclose(np.std(res), std_expected, atol=0.002) + + # values from differential_entropy reference [6], table 2, n=50, m=7 + expon_rmse_std_cases = { # method: (RMSE, STD) + 'vasicek': (0.194, 0.148), + 'van es': (0.179, 0.149), + 'correa': (0.155, 0.152), + 'ebrahimi': (0.151, 0.148) + } + + @pytest.mark.parametrize('method, expected', + list(expon_rmse_std_cases.items())) + def test_expon_rmse_std(self, method, expected): + # test that RMSE and standard deviation of estimators matches values + # given in differential_entropy reference [6]. Incidentally, also + # tests vectorization. + reps, n, m = 10000, 50, 7 + rmse_expected, std_expected = expected + rvs = stats.expon.rvs(size=(reps, n), random_state=0) + true_entropy = stats.expon.entropy() + res = stats.differential_entropy(rvs, window_length=m, + method=method, axis=-1) + assert_allclose(np.sqrt(np.mean((res - true_entropy)**2)), + rmse_expected, atol=0.005) + assert_allclose(np.std(res), std_expected, atol=0.002) + + @pytest.mark.parametrize('n, method', [(8, 'van es'), + (12, 'ebrahimi'), + (1001, 'vasicek')]) + def test_method_auto(self, n, method): + rvs = stats.norm.rvs(size=(n,), random_state=0) + res1 = stats.differential_entropy(rvs) + res2 = stats.differential_entropy(rvs, method=method) + assert res1 == res2 diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_fast_gen_inversion.py b/.venv/Lib/site-packages/scipy/stats/tests/test_fast_gen_inversion.py new file mode 100644 index 0000000000000000000000000000000000000000..ba4d881361ff834695d2057ceb4635b3acf9dc31 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_fast_gen_inversion.py @@ -0,0 +1,430 @@ +import pytest +import warnings +import numpy as np +from numpy.testing import (assert_array_equal, assert_allclose, + suppress_warnings) +from copy import deepcopy +from scipy.stats.sampling import FastGeneratorInversion +from scipy import stats + + +def test_bad_args(): + # loc and scale must be scalar + with pytest.raises(ValueError, match="loc must be scalar"): + FastGeneratorInversion(stats.norm(loc=(1.2, 1.3))) + with pytest.raises(ValueError, match="scale must be scalar"): + FastGeneratorInversion(stats.norm(scale=[1.5, 5.7])) + + with pytest.raises(ValueError, match="'test' cannot be used to seed"): + FastGeneratorInversion(stats.norm(), random_state="test") + + msg = "Each of the 1 shape parameters must be a scalar" + with pytest.raises(ValueError, match=msg): + FastGeneratorInversion(stats.gamma([1.3, 2.5])) + + with pytest.raises(ValueError, match="`dist` must be a frozen"): + FastGeneratorInversion("xy") + + with pytest.raises(ValueError, match="Distribution 'truncnorm' is not"): + FastGeneratorInversion(stats.truncnorm(1.3, 4.5)) + + +def test_random_state(): + # fixed seed + gen = FastGeneratorInversion(stats.norm(), random_state=68734509) + x1 = gen.rvs(size=10) + gen.random_state = 68734509 + x2 = gen.rvs(size=10) + assert_array_equal(x1, x2) + + # Generator + urng = np.random.default_rng(20375857) + gen = FastGeneratorInversion(stats.norm(), random_state=urng) + x1 = gen.rvs(size=10) + gen.random_state = np.random.default_rng(20375857) + x2 = gen.rvs(size=10) + assert_array_equal(x1, x2) + + # RandomState + urng = np.random.RandomState(2364) + gen = FastGeneratorInversion(stats.norm(), random_state=urng) + x1 = gen.rvs(size=10) + gen.random_state = np.random.RandomState(2364) + x2 = gen.rvs(size=10) + assert_array_equal(x1, x2) + + # if evaluate_error is called, it must not interfere with the random_state + # used by rvs + gen = FastGeneratorInversion(stats.norm(), random_state=68734509) + x1 = gen.rvs(size=10) + _ = gen.evaluate_error(size=5) # this will generate 5 uniform rvs + x2 = gen.rvs(size=10) + gen.random_state = 68734509 + x3 = gen.rvs(size=20) + assert_array_equal(x2, x3[10:]) + + +dists_with_params = [ + ("alpha", (3.5,)), + ("anglit", ()), + ("argus", (3.5,)), + ("argus", (5.1,)), + ("beta", (1.5, 0.9)), + ("cosine", ()), + ("betaprime", (2.5, 3.3)), + ("bradford", (1.2,)), + ("burr", (1.3, 2.4)), + ("burr12", (0.7, 1.2)), + ("cauchy", ()), + ("chi2", (3.5,)), + ("chi", (4.5,)), + ("crystalball", (0.7, 1.2)), + ("expon", ()), + ("gamma", (1.5,)), + ("gennorm", (2.7,)), + ("gumbel_l", ()), + ("gumbel_r", ()), + ("hypsecant", ()), + ("invgauss", (3.1,)), + ("invweibull", (1.5,)), + ("laplace", ()), + ("logistic", ()), + ("maxwell", ()), + ("moyal", ()), + ("norm", ()), + ("pareto", (1.3,)), + ("powerlaw", (7.6,)), + ("rayleigh", ()), + ("semicircular", ()), + ("t", (5.7,)), + ("wald", ()), + ("weibull_max", (2.4,)), + ("weibull_min", (1.2,)), +] + + +@pytest.mark.parametrize(("distname, args"), dists_with_params) +def test_rvs_and_ppf(distname, args): + # check sample against rvs generated by rv_continuous + urng = np.random.default_rng(9807324628097097) + rng1 = getattr(stats, distname)(*args) + rvs1 = rng1.rvs(size=500, random_state=urng) + rng2 = FastGeneratorInversion(rng1, random_state=urng) + rvs2 = rng2.rvs(size=500) + assert stats.cramervonmises_2samp(rvs1, rvs2).pvalue > 0.01 + + # check ppf + q = [0.001, 0.1, 0.5, 0.9, 0.999] + assert_allclose(rng1.ppf(q), rng2.ppf(q), atol=1e-10) + + +@pytest.mark.parametrize(("distname, args"), dists_with_params) +def test_u_error(distname, args): + # check sample against rvs generated by rv_continuous + dist = getattr(stats, distname)(*args) + with suppress_warnings() as sup: + # filter the warnings thrown by UNU.RAN + sup.filter(RuntimeWarning) + rng = FastGeneratorInversion(dist) + u_error, x_error = rng.evaluate_error( + size=10_000, random_state=9807324628097097, x_error=False + ) + assert u_error <= 1e-10 + + +@pytest.mark.xfail(reason="geninvgauss CDF is not accurate") +def test_geninvgauss_uerror(): + dist = stats.geninvgauss(3.2, 1.5) + rng = FastGeneratorInversion(dist) + err = rng.evaluate_error(size=10_000, random_state=67982) + assert err[0] < 1e-10 + +# TODO: add more distributions +@pytest.mark.parametrize(("distname, args"), [("beta", (0.11, 0.11))]) +def test_error_extreme_params(distname, args): + # take extreme parameters where u-error might not be below the tolerance + # due to limitations of floating point arithmetic + with suppress_warnings() as sup: + # filter the warnings thrown by UNU.RAN for such extreme parameters + sup.filter(RuntimeWarning) + dist = getattr(stats, distname)(*args) + rng = FastGeneratorInversion(dist) + u_error, x_error = rng.evaluate_error( + size=10_000, random_state=980732462809709732623, x_error=True + ) + if u_error >= 2.5 * 1e-10: + assert x_error < 1e-9 + + +def test_evaluate_error_inputs(): + gen = FastGeneratorInversion(stats.norm()) + with pytest.raises(ValueError, match="size must be an integer"): + gen.evaluate_error(size=3.5) + with pytest.raises(ValueError, match="size must be an integer"): + gen.evaluate_error(size=(3, 3)) + + +def test_rvs_ppf_loc_scale(): + loc, scale = 3.5, 2.3 + dist = stats.norm(loc=loc, scale=scale) + rng = FastGeneratorInversion(dist, random_state=1234) + r = rng.rvs(size=1000) + r_rescaled = (r - loc) / scale + assert stats.cramervonmises(r_rescaled, "norm").pvalue > 0.01 + q = [0.001, 0.1, 0.5, 0.9, 0.999] + assert_allclose(rng._ppf(q), rng.ppf(q), atol=1e-10) + + +def test_domain(): + # only a basic check that the domain argument is passed to the + # UNU.RAN generators + rng = FastGeneratorInversion(stats.norm(), domain=(-1, 1)) + r = rng.rvs(size=100) + assert -1 <= r.min() < r.max() <= 1 + + # if loc and scale are used, new domain is loc + scale*domain + loc, scale = 3.5, 1.3 + dist = stats.norm(loc=loc, scale=scale) + rng = FastGeneratorInversion(dist, domain=(-1.5, 2)) + r = rng.rvs(size=100) + lb, ub = loc - scale * 1.5, loc + scale * 2 + assert lb <= r.min() < r.max() <= ub + + +@pytest.mark.parametrize(("distname, args, expected"), + [("beta", (3.5, 2.5), (0, 1)), + ("norm", (), (-np.inf, np.inf))]) +def test_support(distname, args, expected): + # test that the support is updated if truncation and loc/scale are applied + # use beta distribution since it is a transformed betaprime distribution, + # so it is important that the correct support is considered + # (i.e., the support of beta is (0,1), while betaprime is (0, inf)) + dist = getattr(stats, distname)(*args) + rng = FastGeneratorInversion(dist) + assert_array_equal(rng.support(), expected) + rng.loc = 1 + rng.scale = 2 + assert_array_equal(rng.support(), 1 + 2*np.array(expected)) + + +@pytest.mark.parametrize(("distname, args"), + [("beta", (3.5, 2.5)), ("norm", ())]) +def test_support_truncation(distname, args): + # similar test for truncation + dist = getattr(stats, distname)(*args) + rng = FastGeneratorInversion(dist, domain=(0.5, 0.7)) + assert_array_equal(rng.support(), (0.5, 0.7)) + rng.loc = 1 + rng.scale = 2 + assert_array_equal(rng.support(), (1 + 2 * 0.5, 1 + 2 * 0.7)) + + +def test_domain_shift_truncation(): + # center of norm is zero, it should be shifted to the left endpoint of + # domain. if this was not the case, PINV in UNURAN would raise a warning + # as the center is not inside the domain + with warnings.catch_warnings(): + warnings.simplefilter("error") + rng = FastGeneratorInversion(stats.norm(), domain=(1, 2)) + r = rng.rvs(size=100) + assert 1 <= r.min() < r.max() <= 2 + + +def test_non_rvs_methods_with_domain(): + # as a first step, compare truncated normal against stats.truncnorm + rng = FastGeneratorInversion(stats.norm(), domain=(2.3, 3.2)) + trunc_norm = stats.truncnorm(2.3, 3.2) + # take values that are inside and outside the domain + x = (2.0, 2.4, 3.0, 3.4) + p = (0.01, 0.5, 0.99) + assert_allclose(rng._cdf(x), trunc_norm.cdf(x)) + assert_allclose(rng._ppf(p), trunc_norm.ppf(p)) + loc, scale = 2, 3 + rng.loc = 2 + rng.scale = 3 + trunc_norm = stats.truncnorm(2.3, 3.2, loc=loc, scale=scale) + x = np.array(x) * scale + loc + assert_allclose(rng._cdf(x), trunc_norm.cdf(x)) + assert_allclose(rng._ppf(p), trunc_norm.ppf(p)) + + # do another sanity check with beta distribution + # in that case, it is important to use the correct domain since beta + # is a transformation of betaprime which has a different support + rng = FastGeneratorInversion(stats.beta(2.5, 3.5), domain=(0.3, 0.7)) + rng.loc = 2 + rng.scale = 2.5 + # the support is 2.75, , 3.75 (2 + 2.5 * 0.3, 2 + 2.5 * 0.7) + assert_array_equal(rng.support(), (2.75, 3.75)) + x = np.array([2.74, 2.76, 3.74, 3.76]) + # the cdf needs to be zero outside of the domain + y_cdf = rng._cdf(x) + assert_array_equal((y_cdf[0], y_cdf[3]), (0, 1)) + assert np.min(y_cdf[1:3]) > 0 + # ppf needs to map 0 and 1 to the boundaries + assert_allclose(rng._ppf(y_cdf), (2.75, 2.76, 3.74, 3.75)) + + +def test_non_rvs_methods_without_domain(): + norm_dist = stats.norm() + rng = FastGeneratorInversion(norm_dist) + x = np.linspace(-3, 3, num=10) + p = (0.01, 0.5, 0.99) + assert_allclose(rng._cdf(x), norm_dist.cdf(x)) + assert_allclose(rng._ppf(p), norm_dist.ppf(p)) + loc, scale = 0.5, 1.3 + rng.loc = loc + rng.scale = scale + norm_dist = stats.norm(loc=loc, scale=scale) + assert_allclose(rng._cdf(x), norm_dist.cdf(x)) + assert_allclose(rng._ppf(p), norm_dist.ppf(p)) + +@pytest.mark.parametrize(("domain, x"), + [(None, 0.5), + ((0, 1), 0.5), + ((0, 1), 1.5)]) +def test_scalar_inputs(domain, x): + """ pdf, cdf etc should map scalar values to scalars. check with and + w/o domain since domain impacts pdf, cdf etc + Take x inside and outside of domain """ + rng = FastGeneratorInversion(stats.norm(), domain=domain) + assert np.isscalar(rng._cdf(x)) + assert np.isscalar(rng._ppf(0.5)) + + +def test_domain_argus_large_chi(): + # for large chi, the Gamma distribution is used and the domain has to be + # transformed. this is a test to ensure that the transformation works + chi, lb, ub = 5.5, 0.25, 0.75 + rng = FastGeneratorInversion(stats.argus(chi), domain=(lb, ub)) + rng.random_state = 4574 + r = rng.rvs(size=500) + assert lb <= r.min() < r.max() <= ub + # perform goodness of fit test with conditional cdf + cdf = stats.argus(chi).cdf + prob = cdf(ub) - cdf(lb) + assert stats.cramervonmises(r, lambda x: cdf(x) / prob).pvalue > 0.05 + + +def test_setting_loc_scale(): + rng = FastGeneratorInversion(stats.norm(), random_state=765765864) + r1 = rng.rvs(size=1000) + rng.loc = 3.0 + rng.scale = 2.5 + r2 = rng.rvs(1000) + # rescaled r2 should be again standard normal + assert stats.cramervonmises_2samp(r1, (r2 - 3) / 2.5).pvalue > 0.05 + # reset values to default loc=0, scale=1 + rng.loc = 0 + rng.scale = 1 + r2 = rng.rvs(1000) + assert stats.cramervonmises_2samp(r1, r2).pvalue > 0.05 + + +def test_ignore_shape_range(): + msg = "No generator is defined for the shape parameters" + with pytest.raises(ValueError, match=msg): + rng = FastGeneratorInversion(stats.t(0.03)) + rng = FastGeneratorInversion(stats.t(0.03), ignore_shape_range=True) + # we can ignore the recommended range of shape parameters + # but u-error can be expected to be too large in that case + u_err, _ = rng.evaluate_error(size=1000, random_state=234) + assert u_err >= 1e-6 + +@pytest.mark.xfail_on_32bit( + "NumericalInversePolynomial.qrvs fails for Win 32-bit" +) +class TestQRVS: + def test_input_validation(self): + gen = FastGeneratorInversion(stats.norm()) + + match = "`qmc_engine` must be an instance of..." + with pytest.raises(ValueError, match=match): + gen.qrvs(qmc_engine=0) + + match = "`d` must be consistent with dimension of `qmc_engine`." + with pytest.raises(ValueError, match=match): + gen.qrvs(d=3, qmc_engine=stats.qmc.Halton(2)) + + qrngs = [None, stats.qmc.Sobol(1, seed=0), stats.qmc.Halton(3, seed=0)] + # `size=None` should not add anything to the shape, `size=1` should + sizes = [ + (None, tuple()), + (1, (1,)), + (4, (4,)), + ((4,), (4,)), + ((2, 4), (2, 4)), + ] + # Neither `d=None` nor `d=1` should add anything to the shape + ds = [(None, tuple()), (1, tuple()), (3, (3,))] + + @pytest.mark.parametrize("qrng", qrngs) + @pytest.mark.parametrize("size_in, size_out", sizes) + @pytest.mark.parametrize("d_in, d_out", ds) + def test_QRVS_shape_consistency(self, qrng, size_in, size_out, + d_in, d_out): + gen = FastGeneratorInversion(stats.norm()) + + # If d and qrng.d are inconsistent, an error is raised + if d_in is not None and qrng is not None and qrng.d != d_in: + match = "`d` must be consistent with dimension of `qmc_engine`." + with pytest.raises(ValueError, match=match): + gen.qrvs(size_in, d=d_in, qmc_engine=qrng) + return + + # Sometimes d is really determined by qrng + if d_in is None and qrng is not None and qrng.d != 1: + d_out = (qrng.d,) + + shape_expected = size_out + d_out + + qrng2 = deepcopy(qrng) + qrvs = gen.qrvs(size=size_in, d=d_in, qmc_engine=qrng) + if size_in is not None: + assert qrvs.shape == shape_expected + + if qrng2 is not None: + uniform = qrng2.random(np.prod(size_in) or 1) + qrvs2 = stats.norm.ppf(uniform).reshape(shape_expected) + assert_allclose(qrvs, qrvs2, atol=1e-12) + + def test_QRVS_size_tuple(self): + # QMCEngine samples are always of shape (n, d). When `size` is a tuple, + # we set `n = prod(size)` in the call to qmc_engine.random, transform + # the sample, and reshape it to the final dimensions. When we reshape, + # we need to be careful, because the _columns_ of the sample returned + # by a QMCEngine are "independent"-ish, but the elements within the + # columns are not. We need to make sure that this doesn't get mixed up + # by reshaping: qrvs[..., i] should remain "independent"-ish of + # qrvs[..., i+1], but the elements within qrvs[..., i] should be + # transformed from the same low-discrepancy sequence. + + gen = FastGeneratorInversion(stats.norm()) + + size = (3, 4) + d = 5 + qrng = stats.qmc.Halton(d, seed=0) + qrng2 = stats.qmc.Halton(d, seed=0) + + uniform = qrng2.random(np.prod(size)) + + qrvs = gen.qrvs(size=size, d=d, qmc_engine=qrng) + qrvs2 = stats.norm.ppf(uniform) + + for i in range(d): + sample = qrvs[..., i] + sample2 = qrvs2[:, i].reshape(size) + assert_allclose(sample, sample2, atol=1e-12) + + +def test_burr_overflow(): + # this case leads to an overflow error if math.exp is used + # in the definition of the burr pdf instead of np.exp + # a direct implementation of the PDF as x**(-c-1) / (1+x**(-c))**(d+1) + # also leads to an overflow error in the setup + args = (1.89128135, 0.30195177) + with suppress_warnings() as sup: + # filter potential overflow warning + sup.filter(RuntimeWarning) + gen = FastGeneratorInversion(stats.burr(*args)) + u_error, _ = gen.evaluate_error(random_state=4326) + assert u_error <= 1e-10 diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_fit.py b/.venv/Lib/site-packages/scipy/stats/tests/test_fit.py new file mode 100644 index 0000000000000000000000000000000000000000..9a1692201781b07187d1c683772d7e661bd6a89e --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_fit.py @@ -0,0 +1,1027 @@ +import os +import numpy as np +import numpy.testing as npt +from numpy.testing import assert_allclose, assert_equal +import pytest +from scipy import stats +from scipy.optimize import differential_evolution + +from .test_continuous_basic import distcont +from scipy.stats._distn_infrastructure import FitError +from scipy.stats._distr_params import distdiscrete +from scipy.stats import goodness_of_fit + + +# this is not a proper statistical test for convergence, but only +# verifies that the estimate and true values don't differ by too much + +fit_sizes = [1000, 5000, 10000] # sample sizes to try + +thresh_percent = 0.25 # percent of true parameters for fail cut-off +thresh_min = 0.75 # minimum difference estimate - true to fail test + +mle_failing_fits = [ + 'gausshyper', + 'genexpon', + 'gengamma', + 'kappa4', + 'ksone', + 'kstwo', + 'ncf', + 'ncx2', + 'truncexpon', + 'tukeylambda', + 'vonmises', + 'levy_stable', + 'trapezoid', + 'truncweibull_min', + 'studentized_range', +] + +# The MLE fit method of these distributions doesn't perform well when all +# parameters are fit, so test them with the location fixed at 0. +mle_use_floc0 = [ + 'burr', + 'chi', + 'chi2', + 'mielke', + 'pearson3', + 'genhalflogistic', + 'rdist', + 'pareto', + 'powerlaw', # distfn.nnlf(est2, rvs) > distfn.nnlf(est1, rvs) otherwise + 'powerlognorm', + 'wrapcauchy', + 'rel_breitwigner', +] + +mm_failing_fits = ['alpha', 'betaprime', 'burr', 'burr12', 'cauchy', 'chi', + 'chi2', 'crystalball', 'dgamma', 'dweibull', 'f', + 'fatiguelife', 'fisk', 'foldcauchy', 'genextreme', + 'gengamma', 'genhyperbolic', 'gennorm', 'genpareto', + 'halfcauchy', 'invgamma', 'invweibull', 'jf_skew_t', + 'johnsonsu', 'kappa3', 'ksone', 'kstwo', 'levy', 'levy_l', + 'levy_stable', 'loglaplace', 'lomax', 'mielke', 'nakagami', + 'ncf', 'nct', 'ncx2', 'pareto', 'powerlognorm', 'powernorm', + 'rel_breitwigner', 'skewcauchy', 't', 'trapezoid', 'triang', + 'truncpareto', 'truncweibull_min', 'tukeylambda', + 'studentized_range'] + +# not sure if these fail, but they caused my patience to fail +mm_slow_fits = ['argus', 'exponpow', 'exponweib', 'gausshyper', 'genexpon', + 'genhalflogistic', 'halfgennorm', 'gompertz', 'johnsonsb', + 'kappa4', 'kstwobign', 'recipinvgauss', + 'truncexpon', 'vonmises', 'vonmises_line'] + +failing_fits = {"MM": mm_failing_fits + mm_slow_fits, "MLE": mle_failing_fits} +fail_interval_censored = {"truncpareto"} + +# Don't run the fit test on these: +skip_fit = [ + 'erlang', # Subclass of gamma, generates a warning. + 'genhyperbolic', # too slow +] + + +def cases_test_cont_fit(): + # this tests the closeness of the estimated parameters to the true + # parameters with fit method of continuous distributions + # Note: is slow, some distributions don't converge with sample + # size <= 10000 + for distname, arg in distcont: + if distname not in skip_fit: + yield distname, arg + + +@pytest.mark.slow +@pytest.mark.parametrize('distname,arg', cases_test_cont_fit()) +@pytest.mark.parametrize('method', ["MLE", "MM"]) +def test_cont_fit(distname, arg, method): + if distname in failing_fits[method]: + # Skip failing fits unless overridden + try: + xfail = not int(os.environ['SCIPY_XFAIL']) + except Exception: + xfail = True + if xfail: + msg = "Fitting %s doesn't work reliably yet" % distname + msg += (" [Set environment variable SCIPY_XFAIL=1 to run this" + " test nevertheless.]") + pytest.xfail(msg) + + distfn = getattr(stats, distname) + + truearg = np.hstack([arg, [0.0, 1.0]]) + diffthreshold = np.max(np.vstack([truearg*thresh_percent, + np.full(distfn.numargs+2, thresh_min)]), + 0) + + for fit_size in fit_sizes: + # Note that if a fit succeeds, the other fit_sizes are skipped + np.random.seed(1234) + + with np.errstate(all='ignore'): + rvs = distfn.rvs(size=fit_size, *arg) + if method == 'MLE' and distfn.name in mle_use_floc0: + kwds = {'floc': 0} + else: + kwds = {} + # start with default values + est = distfn.fit(rvs, method=method, **kwds) + if method == 'MLE': + # Trivial test of the use of CensoredData. The fit() method + # will check that data contains no actual censored data, and + # do a regular uncensored fit. + data1 = stats.CensoredData(rvs) + est1 = distfn.fit(data1, **kwds) + msg = ('Different results fitting uncensored data wrapped as' + f' CensoredData: {distfn.name}: est={est} est1={est1}') + assert_allclose(est1, est, rtol=1e-10, err_msg=msg) + if method == 'MLE' and distname not in fail_interval_censored: + # Convert the first `nic` values in rvs to interval-censored + # values. The interval is small, so est2 should be close to + # est. + nic = 15 + interval = np.column_stack((rvs, rvs)) + interval[:nic, 0] *= 0.99 + interval[:nic, 1] *= 1.01 + interval.sort(axis=1) + data2 = stats.CensoredData(interval=interval) + est2 = distfn.fit(data2, **kwds) + msg = ('Different results fitting interval-censored' + f' data: {distfn.name}: est={est} est2={est2}') + assert_allclose(est2, est, rtol=0.05, err_msg=msg) + + diff = est - truearg + + # threshold for location + diffthreshold[-2] = np.max([np.abs(rvs.mean())*thresh_percent, + thresh_min]) + + if np.any(np.isnan(est)): + raise AssertionError('nan returned in fit') + else: + if np.all(np.abs(diff) <= diffthreshold): + break + else: + txt = 'parameter: %s\n' % str(truearg) + txt += 'estimated: %s\n' % str(est) + txt += 'diff : %s\n' % str(diff) + raise AssertionError('fit not very good in %s\n' % distfn.name + txt) + + +def _check_loc_scale_mle_fit(name, data, desired, atol=None): + d = getattr(stats, name) + actual = d.fit(data)[-2:] + assert_allclose(actual, desired, atol=atol, + err_msg='poor mle fit of (loc, scale) in %s' % name) + + +def test_non_default_loc_scale_mle_fit(): + data = np.array([1.01, 1.78, 1.78, 1.78, 1.88, 1.88, 1.88, 2.00]) + _check_loc_scale_mle_fit('uniform', data, [1.01, 0.99], 1e-3) + _check_loc_scale_mle_fit('expon', data, [1.01, 0.73875], 1e-3) + + +def test_expon_fit(): + """gh-6167""" + data = [0, 0, 0, 0, 2, 2, 2, 2] + phat = stats.expon.fit(data, floc=0) + assert_allclose(phat, [0, 1.0], atol=1e-3) + + +def test_fit_error(): + data = np.concatenate([np.zeros(29), np.ones(21)]) + message = "Optimization converged to parameters that are..." + with pytest.raises(FitError, match=message), \ + pytest.warns(RuntimeWarning): + stats.beta.fit(data) + + +@pytest.mark.parametrize("dist, params", + [(stats.norm, (0.5, 2.5)), # type: ignore[attr-defined] + (stats.binom, (10, 0.3, 2))]) # type: ignore[attr-defined] +def test_nnlf_and_related_methods(dist, params): + rng = np.random.default_rng(983459824) + + if hasattr(dist, 'pdf'): + logpxf = dist.logpdf + else: + logpxf = dist.logpmf + + x = dist.rvs(*params, size=100, random_state=rng) + ref = -logpxf(x, *params).sum() + res1 = dist.nnlf(params, x) + res2 = dist._penalized_nnlf(params, x) + assert_allclose(res1, ref) + assert_allclose(res2, ref) + + +def cases_test_fit_mle(): + # These fail default test or hang + skip_basic_fit = {'argus', 'foldnorm', 'truncpareto', 'truncweibull_min', + 'ksone', 'levy_stable', 'studentized_range', 'kstwo', + 'arcsine'} + + # Please keep this list in alphabetical order... + slow_basic_fit = {'alpha', + 'betaprime', 'binom', 'bradford', 'burr12', + 'chi', 'crystalball', 'dweibull', 'exponpow', + 'f', 'fatiguelife', 'fisk', 'foldcauchy', + 'genexpon', 'genextreme', 'gennorm', 'genpareto', + 'gompertz', 'halfgennorm', 'invgauss', 'invweibull', + 'jf_skew_t', 'johnsonsb', 'johnsonsu', 'kappa3', + 'kstwobign', 'loglaplace', 'lognorm', 'lomax', 'mielke', + 'nakagami', 'nbinom', 'norminvgauss', + 'pareto', 'pearson3', 'powerlaw', 'powernorm', + 'randint', 'rdist', 'recipinvgauss', 'rice', + 't', 'uniform', 'weibull_max', 'wrapcauchy'} + + # Please keep this list in alphabetical order... + xslow_basic_fit = {'beta', 'betabinom', 'burr', 'exponweib', + 'gausshyper', 'gengamma', 'genhalflogistic', + 'genhyperbolic', 'geninvgauss', + 'hypergeom', 'kappa4', 'loguniform', + 'ncf', 'nchypergeom_fisher', 'nchypergeom_wallenius', + 'nct', 'ncx2', 'nhypergeom', + 'powerlognorm', 'reciprocal', 'rel_breitwigner', + 'skellam', 'trapezoid', 'triang', 'truncnorm', + 'tukeylambda', 'zipfian'} + + for dist in dict(distdiscrete + distcont): + if dist in skip_basic_fit or not isinstance(dist, str): + reason = "tested separately" + yield pytest.param(dist, marks=pytest.mark.skip(reason=reason)) + elif dist in slow_basic_fit: + reason = "too slow (>= 0.25s)" + yield pytest.param(dist, marks=pytest.mark.slow(reason=reason)) + elif dist in xslow_basic_fit: + reason = "too slow (>= 1.0s)" + yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason)) + else: + yield dist + + +def cases_test_fit_mse(): + # the first four are so slow that I'm not sure whether they would pass + skip_basic_fit = {'levy_stable', 'studentized_range', 'ksone', 'skewnorm', + 'norminvgauss', # super slow (~1 hr) but passes + 'kstwo', # very slow (~25 min) but passes + 'geninvgauss', # quite slow (~4 minutes) but passes + 'gausshyper', 'genhyperbolic', # integration warnings + 'tukeylambda', # close, but doesn't meet tolerance + 'vonmises'} # can have negative CDF; doesn't play nice + + # Please keep this list in alphabetical order... + slow_basic_fit = {'alpha', 'anglit', 'arcsine', 'betabinom', 'bradford', + 'chi', 'chi2', 'crystalball', 'dgamma', 'dweibull', + 'erlang', 'exponnorm', 'exponpow', 'exponweib', + 'fatiguelife', 'fisk', 'foldcauchy', 'foldnorm', + 'gamma', 'genexpon', 'genextreme', 'genhalflogistic', + 'genlogistic', 'genpareto', 'gompertz', + 'hypergeom', 'invweibull', 'jf_skew_t', 'johnsonsb', + 'johnsonsu', 'kappa3', 'kstwobign', + 'laplace_asymmetric', 'loggamma', 'loglaplace', + 'lognorm', 'lomax', + 'maxwell', 'mielke', 'nakagami', 'nhypergeom', + 'pareto', 'powernorm', 'randint', 'recipinvgauss', + 'semicircular', + 't', 'triang', 'truncexpon', 'truncpareto', + 'truncweibull_min', + 'uniform', 'vonmises_line', + 'wald', 'weibull_max', 'weibull_min', 'wrapcauchy'} + + # Please keep this list in alphabetical order... + xslow_basic_fit = {'beta', 'betaprime', 'burr', 'burr12', + 'f', 'gengamma', 'gennorm', + 'halfgennorm', 'invgamma', 'invgauss', + 'kappa4', 'loguniform', + 'ncf', 'nchypergeom_fisher', 'nchypergeom_wallenius', + 'nct', 'ncx2', + 'pearson3', 'powerlaw', 'powerlognorm', + 'rdist', 'reciprocal', 'rel_breitwigner', 'rice', + 'trapezoid', 'truncnorm', + 'zipfian'} + + warns_basic_fit = {'skellam'} # can remove mark after gh-14901 is resolved + + for dist in dict(distdiscrete + distcont): + if dist in skip_basic_fit or not isinstance(dist, str): + reason = "Fails. Oh well." + yield pytest.param(dist, marks=pytest.mark.skip(reason=reason)) + elif dist in slow_basic_fit: + reason = "too slow (>= 0.25s)" + yield pytest.param(dist, marks=pytest.mark.slow(reason=reason)) + elif dist in xslow_basic_fit: + reason = "too slow (>= 1.0s)" + yield pytest.param(dist, marks=pytest.mark.xslow(reason=reason)) + elif dist in warns_basic_fit: + mark = pytest.mark.filterwarnings('ignore::RuntimeWarning') + yield pytest.param(dist, marks=mark) + else: + yield dist + + +def cases_test_fitstart(): + for distname, shapes in dict(distcont).items(): + if (not isinstance(distname, str) or + distname in {'studentized_range', 'recipinvgauss'}): # slow + continue + yield distname, shapes + + +@pytest.mark.parametrize('distname, shapes', cases_test_fitstart()) +def test_fitstart(distname, shapes): + dist = getattr(stats, distname) + rng = np.random.default_rng(216342614) + data = rng.random(10) + + with np.errstate(invalid='ignore', divide='ignore'): # irrelevant to test + guess = dist._fitstart(data) + + assert dist._argcheck(*guess[:-2]) + + +def assert_nlff_less_or_close(dist, data, params1, params0, rtol=1e-7, atol=0, + nlff_name='nnlf'): + nlff = getattr(dist, nlff_name) + nlff1 = nlff(params1, data) + nlff0 = nlff(params0, data) + if not (nlff1 < nlff0): + np.testing.assert_allclose(nlff1, nlff0, rtol=rtol, atol=atol) + + +class TestFit: + dist = stats.binom # type: ignore[attr-defined] + seed = 654634816187 + rng = np.random.default_rng(seed) + data = stats.binom.rvs(5, 0.5, size=100, random_state=rng) # type: ignore[attr-defined] # noqa: E501 + shape_bounds_a = [(1, 10), (0, 1)] + shape_bounds_d = {'n': (1, 10), 'p': (0, 1)} + atol = 5e-2 + rtol = 1e-2 + tols = {'atol': atol, 'rtol': rtol} + + def opt(self, *args, **kwds): + return differential_evolution(*args, seed=0, **kwds) + + def test_dist_iv(self): + message = "`dist` must be an instance of..." + with pytest.raises(ValueError, match=message): + stats.fit(10, self.data, self.shape_bounds_a) + + def test_data_iv(self): + message = "`data` must be exactly one-dimensional." + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, [[1, 2, 3]], self.shape_bounds_a) + + message = "All elements of `data` must be finite numbers." + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, [1, 2, 3, np.nan], self.shape_bounds_a) + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, [1, 2, 3, np.inf], self.shape_bounds_a) + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, ['1', '2', '3'], self.shape_bounds_a) + + def test_bounds_iv(self): + message = "Bounds provided for the following unrecognized..." + shape_bounds = {'n': (1, 10), 'p': (0, 1), '1': (0, 10)} + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "Each element of a `bounds` sequence must be a tuple..." + shape_bounds = [(1, 10, 3), (0, 1)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "Each element of `bounds` must be a tuple specifying..." + shape_bounds = [(1, 10, 3), (0, 1, 0.5)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + shape_bounds = [1, 0] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "A `bounds` sequence must contain at least 2 elements..." + shape_bounds = [(1, 10)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "A `bounds` sequence may not contain more than 3 elements..." + bounds = [(1, 10), (1, 10), (1, 10), (1, 10)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, bounds) + + message = "There are no values for `p` on the interval..." + shape_bounds = {'n': (1, 10), 'p': (1, 0)} + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "There are no values for `n` on the interval..." + shape_bounds = [(10, 1), (0, 1)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "There are no integer values for `n` on the interval..." + shape_bounds = [(1.4, 1.6), (0, 1)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + message = "The intersection of user-provided bounds for `n`" + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data) + shape_bounds = [(-np.inf, np.inf), (0, 1)] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, shape_bounds) + + def test_guess_iv(self): + message = "Guesses provided for the following unrecognized..." + guess = {'n': 1, 'p': 0.5, '1': 255} + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "Each element of `guess` must be a scalar..." + guess = {'n': 1, 'p': 'hi'} + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + guess = [1, 'f'] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + guess = [[1, 2]] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "A `guess` sequence must contain at least 2..." + guess = [1] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "A `guess` sequence may not contain more than 3..." + guess = [1, 2, 3, 4] + with pytest.raises(ValueError, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "Guess for parameter `n` rounded.*|Guess for parameter `p` clipped.*" + guess = {'n': 4.5, 'p': -0.5} + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "Guess for parameter `loc` rounded..." + guess = [5, 0.5, 0.5] + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "Guess for parameter `p` clipped..." + guess = {'n': 5, 'p': -0.5} + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + message = "Guess for parameter `loc` clipped..." + guess = [5, 0.5, 1] + with pytest.warns(RuntimeWarning, match=message): + stats.fit(self.dist, self.data, self.shape_bounds_d, guess=guess) + + def basic_fit_test(self, dist_name, method): + + N = 5000 + dist_data = dict(distcont + distdiscrete) + rng = np.random.default_rng(self.seed) + dist = getattr(stats, dist_name) + shapes = np.array(dist_data[dist_name]) + bounds = np.empty((len(shapes) + 2, 2), dtype=np.float64) + bounds[:-2, 0] = shapes/10.**np.sign(shapes) + bounds[:-2, 1] = shapes*10.**np.sign(shapes) + bounds[-2] = (0, 10) + bounds[-1] = (1e-16, 10) + loc = rng.uniform(*bounds[-2]) + scale = rng.uniform(*bounds[-1]) + ref = list(dist_data[dist_name]) + [loc, scale] + + if getattr(dist, 'pmf', False): + ref = ref[:-1] + ref[-1] = np.floor(loc) + data = dist.rvs(*ref, size=N, random_state=rng) + bounds = bounds[:-1] + if getattr(dist, 'pdf', False): + data = dist.rvs(*ref, size=N, random_state=rng) + + with npt.suppress_warnings() as sup: + sup.filter(RuntimeWarning, "overflow encountered") + res = stats.fit(dist, data, bounds, method=method, + optimizer=self.opt) + + nlff_names = {'mle': 'nnlf', 'mse': '_penalized_nlpsf'} + nlff_name = nlff_names[method] + assert_nlff_less_or_close(dist, data, res.params, ref, **self.tols, + nlff_name=nlff_name) + + @pytest.mark.parametrize("dist_name", cases_test_fit_mle()) + def test_basic_fit_mle(self, dist_name): + self.basic_fit_test(dist_name, "mle") + + @pytest.mark.parametrize("dist_name", cases_test_fit_mse()) + def test_basic_fit_mse(self, dist_name): + self.basic_fit_test(dist_name, "mse") + + def test_arcsine(self): + # Can't guarantee that all distributions will fit all data with + # arbitrary bounds. This distribution just happens to fail above. + # Try something slightly different. + N = 1000 + rng = np.random.default_rng(self.seed) + dist = stats.arcsine + shapes = (1., 2.) + data = dist.rvs(*shapes, size=N, random_state=rng) + shape_bounds = {'loc': (0.1, 10), 'scale': (0.1, 10)} + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols) + + def test_argus(self): + # Can't guarantee that all distributions will fit all data with + # arbitrary bounds. This distribution just happens to fail above. + # Try something slightly different. + N = 1000 + rng = np.random.default_rng(self.seed) + dist = stats.argus + shapes = (1., 2., 3.) + data = dist.rvs(*shapes, size=N, random_state=rng) + shape_bounds = {'chi': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)} + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + + assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols) + + def test_foldnorm(self): + # Can't guarantee that all distributions will fit all data with + # arbitrary bounds. This distribution just happens to fail above. + # Try something slightly different. + N = 1000 + rng = np.random.default_rng(self.seed) + dist = stats.foldnorm + shapes = (1.952125337355587, 2., 3.) + data = dist.rvs(*shapes, size=N, random_state=rng) + shape_bounds = {'c': (0.1, 10), 'loc': (0.1, 10), 'scale': (0.1, 10)} + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + + assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols) + + def test_truncpareto(self): + # Can't guarantee that all distributions will fit all data with + # arbitrary bounds. This distribution just happens to fail above. + # Try something slightly different. + N = 1000 + rng = np.random.default_rng(self.seed) + dist = stats.truncpareto + shapes = (1.8, 5.3, 2.3, 4.1) + data = dist.rvs(*shapes, size=N, random_state=rng) + shape_bounds = [(0.1, 10)]*4 + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + + assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols) + + def test_truncweibull_min(self): + # Can't guarantee that all distributions will fit all data with + # arbitrary bounds. This distribution just happens to fail above. + # Try something slightly different. + N = 1000 + rng = np.random.default_rng(self.seed) + dist = stats.truncweibull_min + shapes = (2.5, 0.25, 1.75, 2., 3.) + data = dist.rvs(*shapes, size=N, random_state=rng) + shape_bounds = [(0.1, 10)]*5 + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + + assert_nlff_less_or_close(dist, data, res.params, shapes, **self.tols) + + def test_missing_shape_bounds(self): + # some distributions have a small domain w.r.t. a parameter, e.g. + # $p \in [0, 1]$ for binomial distribution + # User does not need to provide these because the intersection of the + # user's bounds (none) and the distribution's domain is finite + N = 1000 + rng = np.random.default_rng(self.seed) + + dist = stats.binom + n, p, loc = 10, 0.65, 0 + data = dist.rvs(n, p, loc=loc, size=N, random_state=rng) + shape_bounds = {'n': np.array([0, 20])} # check arrays are OK, too + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + assert_allclose(res.params, (n, p, loc), **self.tols) + + dist = stats.bernoulli + p, loc = 0.314159, 0 + data = dist.rvs(p, loc=loc, size=N, random_state=rng) + res = stats.fit(dist, data, optimizer=self.opt) + assert_allclose(res.params, (p, loc), **self.tols) + + def test_fit_only_loc_scale(self): + # fit only loc + N = 5000 + rng = np.random.default_rng(self.seed) + + dist = stats.norm + loc, scale = 1.5, 1 + data = dist.rvs(loc=loc, size=N, random_state=rng) + loc_bounds = (0, 5) + bounds = {'loc': loc_bounds} + res = stats.fit(dist, data, bounds, optimizer=self.opt) + assert_allclose(res.params, (loc, scale), **self.tols) + + # fit only scale + loc, scale = 0, 2.5 + data = dist.rvs(scale=scale, size=N, random_state=rng) + scale_bounds = (0.01, 5) + bounds = {'scale': scale_bounds} + res = stats.fit(dist, data, bounds, optimizer=self.opt) + assert_allclose(res.params, (loc, scale), **self.tols) + + # fit only loc and scale + dist = stats.norm + loc, scale = 1.5, 2.5 + data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng) + bounds = {'loc': loc_bounds, 'scale': scale_bounds} + res = stats.fit(dist, data, bounds, optimizer=self.opt) + assert_allclose(res.params, (loc, scale), **self.tols) + + def test_everything_fixed(self): + N = 5000 + rng = np.random.default_rng(self.seed) + + dist = stats.norm + loc, scale = 1.5, 2.5 + data = dist.rvs(loc=loc, scale=scale, size=N, random_state=rng) + + # loc, scale fixed to 0, 1 by default + res = stats.fit(dist, data) + assert_allclose(res.params, (0, 1), **self.tols) + + # loc, scale explicitly fixed + bounds = {'loc': (loc, loc), 'scale': (scale, scale)} + res = stats.fit(dist, data, bounds) + assert_allclose(res.params, (loc, scale), **self.tols) + + # `n` gets fixed during polishing + dist = stats.binom + n, p, loc = 10, 0.65, 0 + data = dist.rvs(n, p, loc=loc, size=N, random_state=rng) + shape_bounds = {'n': (0, 20), 'p': (0.65, 0.65)} + res = stats.fit(dist, data, shape_bounds, optimizer=self.opt) + assert_allclose(res.params, (n, p, loc), **self.tols) + + def test_failure(self): + N = 5000 + rng = np.random.default_rng(self.seed) + + dist = stats.nbinom + shapes = (5, 0.5) + data = dist.rvs(*shapes, size=N, random_state=rng) + + assert data.min() == 0 + # With lower bounds on location at 0.5, likelihood is zero + bounds = [(0, 30), (0, 1), (0.5, 10)] + res = stats.fit(dist, data, bounds) + message = "Optimization converged to parameter values that are" + assert res.message.startswith(message) + assert res.success is False + + @pytest.mark.xslow + def test_guess(self): + # Test that guess helps DE find the desired solution + N = 2000 + # With some seeds, `fit` doesn't need a guess + rng = np.random.default_rng(1963904448561) + dist = stats.nhypergeom + params = (20, 7, 12, 0) + bounds = [(2, 200), (0.7, 70), (1.2, 120), (0, 10)] + + data = dist.rvs(*params, size=N, random_state=rng) + + res = stats.fit(dist, data, bounds, optimizer=self.opt) + assert not np.allclose(res.params, params, **self.tols) + + res = stats.fit(dist, data, bounds, guess=params, optimizer=self.opt) + assert_allclose(res.params, params, **self.tols) + + def test_mse_accuracy_1(self): + # Test maximum spacing estimation against example from Wikipedia + # https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples + data = [2, 4] + dist = stats.expon + bounds = {'loc': (0, 0), 'scale': (1e-8, 10)} + res_mle = stats.fit(dist, data, bounds=bounds, method='mle') + assert_allclose(res_mle.params.scale, 3, atol=1e-3) + res_mse = stats.fit(dist, data, bounds=bounds, method='mse') + assert_allclose(res_mse.params.scale, 3.915, atol=1e-3) + + def test_mse_accuracy_2(self): + # Test maximum spacing estimation against example from Wikipedia + # https://en.wikipedia.org/wiki/Maximum_spacing_estimation#Examples + rng = np.random.default_rng(9843212616816518964) + + dist = stats.uniform + n = 10 + data = dist(3, 6).rvs(size=n, random_state=rng) + bounds = {'loc': (0, 10), 'scale': (1e-8, 10)} + res = stats.fit(dist, data, bounds=bounds, method='mse') + # (loc=3.608118420015416, scale=5.509323262055043) + + x = np.sort(data) + a = (n*x[0] - x[-1])/(n - 1) + b = (n*x[-1] - x[0])/(n - 1) + ref = a, b-a # (3.6081133632151503, 5.509328130317254) + assert_allclose(res.params, ref, rtol=1e-4) + + +# Data from Matlab: https://www.mathworks.com/help/stats/lillietest.html +examgrades = [65, 61, 81, 88, 69, 89, 55, 84, 86, 84, 71, 81, 84, 81, 78, 67, + 96, 66, 73, 75, 59, 71, 69, 63, 79, 76, 63, 85, 87, 88, 80, 71, + 65, 84, 71, 75, 81, 79, 64, 65, 84, 77, 70, 75, 84, 75, 73, 92, + 90, 79, 80, 71, 73, 71, 58, 79, 73, 64, 77, 82, 81, 59, 54, 82, + 57, 79, 79, 73, 74, 82, 63, 64, 73, 69, 87, 68, 81, 73, 83, 73, + 80, 73, 73, 71, 66, 78, 64, 74, 68, 67, 75, 75, 80, 85, 74, 76, + 80, 77, 93, 70, 86, 80, 81, 83, 68, 60, 85, 64, 74, 82, 81, 77, + 66, 85, 75, 81, 69, 60, 83, 72] + + +class TestGoodnessOfFit: + + def test_gof_iv(self): + dist = stats.norm + x = [1, 2, 3] + + message = r"`dist` must be a \(non-frozen\) instance of..." + with pytest.raises(TypeError, match=message): + goodness_of_fit(stats.norm(), x) + + message = "`data` must be a one-dimensional array of numbers." + with pytest.raises(ValueError, match=message): + goodness_of_fit(dist, [[1, 2, 3]]) + + message = "`statistic` must be one of..." + with pytest.raises(ValueError, match=message): + goodness_of_fit(dist, x, statistic='mm') + + message = "`n_mc_samples` must be an integer." + with pytest.raises(TypeError, match=message): + goodness_of_fit(dist, x, n_mc_samples=1000.5) + + message = "'herring' cannot be used to seed a" + with pytest.raises(ValueError, match=message): + goodness_of_fit(dist, x, random_state='herring') + + def test_against_ks(self): + rng = np.random.default_rng(8517426291317196949) + x = examgrades + known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)} + res = goodness_of_fit(stats.norm, x, known_params=known_params, + statistic='ks', random_state=rng) + ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact') + assert_allclose(res.statistic, ref.statistic) # ~0.0848 + assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.335 + + def test_against_lilliefors(self): + rng = np.random.default_rng(2291803665717442724) + x = examgrades + res = goodness_of_fit(stats.norm, x, statistic='ks', random_state=rng) + known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)} + ref = stats.kstest(x, stats.norm(**known_params).cdf, method='exact') + assert_allclose(res.statistic, ref.statistic) # ~0.0848 + assert_allclose(res.pvalue, 0.0348, atol=5e-3) + + def test_against_cvm(self): + rng = np.random.default_rng(8674330857509546614) + x = examgrades + known_params = {'loc': np.mean(x), 'scale': np.std(x, ddof=1)} + res = goodness_of_fit(stats.norm, x, known_params=known_params, + statistic='cvm', random_state=rng) + ref = stats.cramervonmises(x, stats.norm(**known_params).cdf) + assert_allclose(res.statistic, ref.statistic) # ~0.090 + assert_allclose(res.pvalue, ref.pvalue, atol=5e-3) # ~0.636 + + def test_against_anderson_case_0(self): + # "Case 0" is where loc and scale are known [1] + rng = np.random.default_rng(7384539336846690410) + x = np.arange(1, 101) + # loc that produced critical value of statistic found w/ root_scalar + known_params = {'loc': 45.01575354024957, 'scale': 30} + res = goodness_of_fit(stats.norm, x, known_params=known_params, + statistic='ad', random_state=rng) + assert_allclose(res.statistic, 2.492) # See [1] Table 1A 1.0 + assert_allclose(res.pvalue, 0.05, atol=5e-3) + + def test_against_anderson_case_1(self): + # "Case 1" is where scale is known and loc is fit [1] + rng = np.random.default_rng(5040212485680146248) + x = np.arange(1, 101) + # scale that produced critical value of statistic found w/ root_scalar + known_params = {'scale': 29.957112639101933} + res = goodness_of_fit(stats.norm, x, known_params=known_params, + statistic='ad', random_state=rng) + assert_allclose(res.statistic, 0.908) # See [1] Table 1B 1.1 + assert_allclose(res.pvalue, 0.1, atol=5e-3) + + def test_against_anderson_case_2(self): + # "Case 2" is where loc is known and scale is fit [1] + rng = np.random.default_rng(726693985720914083) + x = np.arange(1, 101) + # loc that produced critical value of statistic found w/ root_scalar + known_params = {'loc': 44.5680212261933} + res = goodness_of_fit(stats.norm, x, known_params=known_params, + statistic='ad', random_state=rng) + assert_allclose(res.statistic, 2.904) # See [1] Table 1B 1.2 + assert_allclose(res.pvalue, 0.025, atol=5e-3) + + def test_against_anderson_case_3(self): + # "Case 3" is where both loc and scale are fit [1] + rng = np.random.default_rng(6763691329830218206) + # c that produced critical value of statistic found w/ root_scalar + x = stats.skewnorm.rvs(1.4477847789132101, loc=1, scale=2, size=100, + random_state=rng) + res = goodness_of_fit(stats.norm, x, statistic='ad', random_state=rng) + assert_allclose(res.statistic, 0.559) # See [1] Table 1B 1.2 + assert_allclose(res.pvalue, 0.15, atol=5e-3) + + @pytest.mark.slow + def test_against_anderson_gumbel_r(self): + rng = np.random.default_rng(7302761058217743) + # c that produced critical value of statistic found w/ root_scalar + x = stats.genextreme(0.051896837188595134, loc=0.5, + scale=1.5).rvs(size=1000, random_state=rng) + res = goodness_of_fit(stats.gumbel_r, x, statistic='ad', + random_state=rng) + ref = stats.anderson(x, dist='gumbel_r') + assert_allclose(res.statistic, ref.critical_values[0]) + assert_allclose(res.pvalue, ref.significance_level[0]/100, atol=5e-3) + + def test_against_filliben_norm(self): + # Test against `stats.fit` ref. [7] Section 8 "Example" + rng = np.random.default_rng(8024266430745011915) + y = [6, 1, -4, 8, -2, 5, 0] + known_params = {'loc': 0, 'scale': 1} + res = stats.goodness_of_fit(stats.norm, y, known_params=known_params, + statistic="filliben", random_state=rng) + # Slight discrepancy presumably due to roundoff in Filliben's + # calculation. Using exact order statistic medians instead of + # Filliben's approximation doesn't account for it. + assert_allclose(res.statistic, 0.98538, atol=1e-4) + assert 0.75 < res.pvalue < 0.9 + + # Using R's ppcc library: + # library(ppcc) + # options(digits=16) + # x < - c(6, 1, -4, 8, -2, 5, 0) + # set.seed(100) + # ppccTest(x, "qnorm", ppos="Filliben") + # Discrepancy with + assert_allclose(res.statistic, 0.98540957187084, rtol=2e-5) + assert_allclose(res.pvalue, 0.8875, rtol=2e-3) + + def test_filliben_property(self): + # Filliben's statistic should be independent of data location and scale + rng = np.random.default_rng(8535677809395478813) + x = rng.normal(loc=10, scale=0.5, size=100) + res = stats.goodness_of_fit(stats.norm, x, + statistic="filliben", random_state=rng) + known_params = {'loc': 0, 'scale': 1} + ref = stats.goodness_of_fit(stats.norm, x, known_params=known_params, + statistic="filliben", random_state=rng) + assert_allclose(res.statistic, ref.statistic, rtol=1e-15) + + @pytest.mark.parametrize('case', [(25, [.928, .937, .950, .958, .966]), + (50, [.959, .965, .972, .977, .981]), + (95, [.977, .979, .983, .986, .989])]) + def test_against_filliben_norm_table(self, case): + # Test against `stats.fit` ref. [7] Table 1 + rng = np.random.default_rng(504569995557928957) + n, ref = case + x = rng.random(n) + known_params = {'loc': 0, 'scale': 1} + res = stats.goodness_of_fit(stats.norm, x, known_params=known_params, + statistic="filliben", random_state=rng) + percentiles = np.array([0.005, 0.01, 0.025, 0.05, 0.1]) + res = stats.scoreatpercentile(res.null_distribution, percentiles*100) + assert_allclose(res, ref, atol=2e-3) + + @pytest.mark.slow + @pytest.mark.parametrize('case', [(5, 0.95772790260469, 0.4755), + (6, 0.95398832257958, 0.3848), + (7, 0.9432692889277, 0.2328)]) + def test_against_ppcc(self, case): + # Test against R ppcc, e.g. + # library(ppcc) + # options(digits=16) + # x < - c(0.52325412, 1.06907699, -0.36084066, 0.15305959, 0.99093194) + # set.seed(100) + # ppccTest(x, "qrayleigh", ppos="Filliben") + n, ref_statistic, ref_pvalue = case + rng = np.random.default_rng(7777775561439803116) + x = rng.normal(size=n) + res = stats.goodness_of_fit(stats.rayleigh, x, statistic="filliben", + random_state=rng) + assert_allclose(res.statistic, ref_statistic, rtol=1e-4) + assert_allclose(res.pvalue, ref_pvalue, atol=1.5e-2) + + def test_params_effects(self): + # Ensure that `guessed_params`, `fit_params`, and `known_params` have + # the intended effects. + rng = np.random.default_rng(9121950977643805391) + x = stats.skewnorm.rvs(-5.044559778383153, loc=1, scale=2, size=50, + random_state=rng) + + # Show that `guessed_params` don't fit to the guess, + # but `fit_params` and `known_params` respect the provided fit + guessed_params = {'c': 13.4} + fit_params = {'scale': 13.73} + known_params = {'loc': -13.85} + rng = np.random.default_rng(9121950977643805391) + res1 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2, + guessed_params=guessed_params, + fit_params=fit_params, + known_params=known_params, random_state=rng) + assert not np.allclose(res1.fit_result.params.c, 13.4) + assert_equal(res1.fit_result.params.scale, 13.73) + assert_equal(res1.fit_result.params.loc, -13.85) + + # Show that changing the guess changes the parameter that gets fit, + # and it changes the null distribution + guessed_params = {'c': 2} + rng = np.random.default_rng(9121950977643805391) + res2 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2, + guessed_params=guessed_params, + fit_params=fit_params, + known_params=known_params, random_state=rng) + assert not np.allclose(res2.fit_result.params.c, + res1.fit_result.params.c, rtol=1e-8) + assert not np.allclose(res2.null_distribution, + res1.null_distribution, rtol=1e-8) + assert_equal(res2.fit_result.params.scale, 13.73) + assert_equal(res2.fit_result.params.loc, -13.85) + + # If we set all parameters as fit_params and known_params, + # they're all fixed to those values, but the null distribution + # varies. + fit_params = {'c': 13.4, 'scale': 13.73} + rng = np.random.default_rng(9121950977643805391) + res3 = goodness_of_fit(stats.weibull_min, x, n_mc_samples=2, + guessed_params=guessed_params, + fit_params=fit_params, + known_params=known_params, random_state=rng) + assert_equal(res3.fit_result.params.c, 13.4) + assert_equal(res3.fit_result.params.scale, 13.73) + assert_equal(res3.fit_result.params.loc, -13.85) + assert not np.allclose(res3.null_distribution, res1.null_distribution) + + def test_custom_statistic(self): + # Test support for custom statistic function. + + # References: + # [1] Pyke, R. (1965). "Spacings". Journal of the Royal Statistical + # Society: Series B (Methodological), 27(3): 395-436. + # [2] Burrows, P. M. (1979). "Selected Percentage Points of + # Greenwood's Statistics". Journal of the Royal Statistical + # Society. Series A (General), 142(2): 256-258. + + # Use the Greenwood statistic for illustration; see [1, p.402]. + def greenwood(dist, data, *, axis): + x = np.sort(data, axis=axis) + y = dist.cdf(x) + d = np.diff(y, axis=axis, prepend=0, append=1) + return np.sum(d ** 2, axis=axis) + + # Run the Monte Carlo test with sample size = 5 on a fully specified + # null distribution, and compare the simulated quantiles to the exact + # ones given in [2, Table 1, column (n = 5)]. + rng = np.random.default_rng(9121950977643805391) + data = stats.expon.rvs(size=5, random_state=rng) + result = goodness_of_fit(stats.expon, data, + known_params={'loc': 0, 'scale': 1}, + statistic=greenwood, random_state=rng) + p = [.01, .05, .1, .2, .3, .4, .5, .6, .7, .8, .9, .95, .99] + exact_quantiles = [ + .183863, .199403, .210088, .226040, .239947, .253677, .268422, + .285293, .306002, .334447, .382972, .432049, .547468] + simulated_quantiles = np.quantile(result.null_distribution, p) + assert_allclose(simulated_quantiles, exact_quantiles, atol=0.005) + +class TestFitResult: + def test_plot_iv(self): + rng = np.random.default_rng(1769658657308472721) + data = stats.norm.rvs(0, 1, size=100, random_state=rng) + + def optimizer(*args, **kwargs): + return differential_evolution(*args, **kwargs, seed=rng) + + bounds = [(0, 30), (0, 1)] + res = stats.fit(stats.norm, data, bounds, optimizer=optimizer) + try: + import matplotlib # noqa: F401 + message = r"`plot_type` must be one of \{'..." + with pytest.raises(ValueError, match=message): + res.plot(plot_type='llama') + except (ModuleNotFoundError, ImportError): + # Avoid trying to call MPL with numpy 2.0-dev, because that fails + # too often due to ABI mismatches and is hard to avoid. This test + # will work fine again once MPL has done a 2.0-compatible release. + if not np.__version__.startswith('2.0.0.dev0'): + message = r"matplotlib must be installed to use method `plot`." + with pytest.raises(ModuleNotFoundError, match=message): + res.plot(plot_type='llama') diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_hypotests.py b/.venv/Lib/site-packages/scipy/stats/tests/test_hypotests.py new file mode 100644 index 0000000000000000000000000000000000000000..07b609d4ca9a10ac0400759d24963475865b7e5b --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_hypotests.py @@ -0,0 +1,1879 @@ +from itertools import product + +import numpy as np +import random +import functools +import pytest +from numpy.testing import (assert_, assert_equal, assert_allclose, + assert_almost_equal) # avoid new uses +from pytest import raises as assert_raises + +import scipy.stats as stats +from scipy.stats import distributions +from scipy.stats._hypotests import (epps_singleton_2samp, cramervonmises, + _cdf_cvm, cramervonmises_2samp, + _pval_cvm_2samp_exact, barnard_exact, + boschloo_exact) +from scipy.stats._mannwhitneyu import mannwhitneyu, _mwu_state +from .common_tests import check_named_results +from scipy._lib._testutils import _TestPythranFunc + + +class TestEppsSingleton: + def test_statistic_1(self): + # first example in Goerg & Kaiser, also in original paper of + # Epps & Singleton. Note: values do not match exactly, the + # value of the interquartile range varies depending on how + # quantiles are computed + x = np.array([-0.35, 2.55, 1.73, 0.73, 0.35, + 2.69, 0.46, -0.94, -0.37, 12.07]) + y = np.array([-1.15, -0.15, 2.48, 3.25, 3.71, + 4.29, 5.00, 7.74, 8.38, 8.60]) + w, p = epps_singleton_2samp(x, y) + assert_almost_equal(w, 15.14, decimal=1) + assert_almost_equal(p, 0.00442, decimal=3) + + def test_statistic_2(self): + # second example in Goerg & Kaiser, again not a perfect match + x = np.array((0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 5, 5, 5, 5, 6, 10, + 10, 10, 10)) + y = np.array((10, 4, 0, 5, 10, 10, 0, 5, 6, 7, 10, 3, 1, 7, 0, 8, 1, + 5, 8, 10)) + w, p = epps_singleton_2samp(x, y) + assert_allclose(w, 8.900, atol=0.001) + assert_almost_equal(p, 0.06364, decimal=3) + + def test_epps_singleton_array_like(self): + np.random.seed(1234) + x, y = np.arange(30), np.arange(28) + + w1, p1 = epps_singleton_2samp(list(x), list(y)) + w2, p2 = epps_singleton_2samp(tuple(x), tuple(y)) + w3, p3 = epps_singleton_2samp(x, y) + + assert_(w1 == w2 == w3) + assert_(p1 == p2 == p3) + + def test_epps_singleton_size(self): + # raise error if less than 5 elements + x, y = (1, 2, 3, 4), np.arange(10) + assert_raises(ValueError, epps_singleton_2samp, x, y) + + def test_epps_singleton_nonfinite(self): + # raise error if there are non-finite values + x, y = (1, 2, 3, 4, 5, np.inf), np.arange(10) + assert_raises(ValueError, epps_singleton_2samp, x, y) + + def test_names(self): + x, y = np.arange(20), np.arange(30) + res = epps_singleton_2samp(x, y) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes) + + +class TestCvm: + # the expected values of the cdfs are taken from Table 1 in + # Csorgo / Faraway: The Exact and Asymptotic Distribution of + # Cramér-von Mises Statistics, 1996. + def test_cdf_4(self): + assert_allclose( + _cdf_cvm([0.02983, 0.04111, 0.12331, 0.94251], 4), + [0.01, 0.05, 0.5, 0.999], + atol=1e-4) + + def test_cdf_10(self): + assert_allclose( + _cdf_cvm([0.02657, 0.03830, 0.12068, 0.56643], 10), + [0.01, 0.05, 0.5, 0.975], + atol=1e-4) + + def test_cdf_1000(self): + assert_allclose( + _cdf_cvm([0.02481, 0.03658, 0.11889, 1.16120], 1000), + [0.01, 0.05, 0.5, 0.999], + atol=1e-4) + + def test_cdf_inf(self): + assert_allclose( + _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204]), + [0.01, 0.05, 0.5, 0.999], + atol=1e-4) + + def test_cdf_support(self): + # cdf has support on [1/(12*n), n/3] + assert_equal(_cdf_cvm([1/(12*533), 533/3], 533), [0, 1]) + assert_equal(_cdf_cvm([1/(12*(27 + 1)), (27 + 1)/3], 27), [0, 1]) + + def test_cdf_large_n(self): + # test that asymptotic cdf and cdf for large samples are close + assert_allclose( + _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100], 10000), + _cdf_cvm([0.02480, 0.03656, 0.11888, 1.16204, 100]), + atol=1e-4) + + def test_large_x(self): + # for large values of x and n, the series used to compute the cdf + # converges slowly. + # this leads to bug in R package goftest and MAPLE code that is + # the basis of the implementation in scipy + # note: cdf = 1 for x >= 1000/3 and n = 1000 + assert_(0.99999 < _cdf_cvm(333.3, 1000) < 1.0) + assert_(0.99999 < _cdf_cvm(333.3) < 1.0) + + def test_low_p(self): + # _cdf_cvm can return values larger than 1. In that case, we just + # return a p-value of zero. + n = 12 + res = cramervonmises(np.ones(n)*0.8, 'norm') + assert_(_cdf_cvm(res.statistic, n) > 1.0) + assert_equal(res.pvalue, 0) + + def test_invalid_input(self): + assert_raises(ValueError, cramervonmises, [1.5], "norm") + assert_raises(ValueError, cramervonmises, (), "norm") + + def test_values_R(self): + # compared against R package goftest, version 1.1.1 + # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), "pnorm") + res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm") + assert_allclose(res.statistic, 0.288156, atol=1e-6) + assert_allclose(res.pvalue, 0.1453465, atol=1e-6) + + # goftest::cvm.test(c(-1.7, 2, 0, 1.3, 4, 0.1, 0.6), + # "pnorm", mean = 3, sd = 1.5) + res = cramervonmises([-1.7, 2, 0, 1.3, 4, 0.1, 0.6], "norm", (3, 1.5)) + assert_allclose(res.statistic, 0.9426685, atol=1e-6) + assert_allclose(res.pvalue, 0.002026417, atol=1e-6) + + # goftest::cvm.test(c(1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5), "pexp") + res = cramervonmises([1, 2, 5, 1.4, 0.14, 11, 13, 0.9, 7.5], "expon") + assert_allclose(res.statistic, 0.8421854, atol=1e-6) + assert_allclose(res.pvalue, 0.004433406, atol=1e-6) + + def test_callable_cdf(self): + x, args = np.arange(5), (1.4, 0.7) + r1 = cramervonmises(x, distributions.expon.cdf) + r2 = cramervonmises(x, "expon") + assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue)) + + r1 = cramervonmises(x, distributions.beta.cdf, args) + r2 = cramervonmises(x, "beta", args) + assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue)) + + +class TestMannWhitneyU: + def setup_method(self): + _mwu_state._recursive = True + + # All magic numbers are from R wilcox.test unless otherwise specified + # https://rdrr.io/r/stats/wilcox.test.html + + # --- Test Input Validation --- + + def test_input_validation(self): + x = np.array([1, 2]) # generic, valid inputs + y = np.array([3, 4]) + with assert_raises(ValueError, match="`x` and `y` must be of nonzero"): + mannwhitneyu([], y) + with assert_raises(ValueError, match="`x` and `y` must be of nonzero"): + mannwhitneyu(x, []) + with assert_raises(ValueError, match="`use_continuity` must be one"): + mannwhitneyu(x, y, use_continuity='ekki') + with assert_raises(ValueError, match="`alternative` must be one of"): + mannwhitneyu(x, y, alternative='ekki') + with assert_raises(ValueError, match="`axis` must be an integer"): + mannwhitneyu(x, y, axis=1.5) + with assert_raises(ValueError, match="`method` must be one of"): + mannwhitneyu(x, y, method='ekki') + + def test_auto(self): + # Test that default method ('auto') chooses intended method + + np.random.seed(1) + n = 8 # threshold to switch from exact to asymptotic + + # both inputs are smaller than threshold; should use exact + x = np.random.rand(n-1) + y = np.random.rand(n-1) + auto = mannwhitneyu(x, y) + asymptotic = mannwhitneyu(x, y, method='asymptotic') + exact = mannwhitneyu(x, y, method='exact') + assert auto.pvalue == exact.pvalue + assert auto.pvalue != asymptotic.pvalue + + # one input is smaller than threshold; should use exact + x = np.random.rand(n-1) + y = np.random.rand(n+1) + auto = mannwhitneyu(x, y) + asymptotic = mannwhitneyu(x, y, method='asymptotic') + exact = mannwhitneyu(x, y, method='exact') + assert auto.pvalue == exact.pvalue + assert auto.pvalue != asymptotic.pvalue + + # other input is smaller than threshold; should use exact + auto = mannwhitneyu(y, x) + asymptotic = mannwhitneyu(x, y, method='asymptotic') + exact = mannwhitneyu(x, y, method='exact') + assert auto.pvalue == exact.pvalue + assert auto.pvalue != asymptotic.pvalue + + # both inputs are larger than threshold; should use asymptotic + x = np.random.rand(n+1) + y = np.random.rand(n+1) + auto = mannwhitneyu(x, y) + asymptotic = mannwhitneyu(x, y, method='asymptotic') + exact = mannwhitneyu(x, y, method='exact') + assert auto.pvalue != exact.pvalue + assert auto.pvalue == asymptotic.pvalue + + # both inputs are smaller than threshold, but there is a tie + # should use asymptotic + x = np.random.rand(n-1) + y = np.random.rand(n-1) + y[3] = x[3] + auto = mannwhitneyu(x, y) + asymptotic = mannwhitneyu(x, y, method='asymptotic') + exact = mannwhitneyu(x, y, method='exact') + assert auto.pvalue != exact.pvalue + assert auto.pvalue == asymptotic.pvalue + + # --- Test Basic Functionality --- + + x = [210.052110, 110.190630, 307.918612] + y = [436.08811482466416, 416.37397329768191, 179.96975939463582, + 197.8118754228619, 34.038757281225756, 138.54220550921517, + 128.7769351470246, 265.92721427951852, 275.6617533155341, + 592.34083395416258, 448.73177590617018, 300.61495185038905, + 187.97508449019588] + + # This test was written for mann_whitney_u in gh-4933. + # Originally, the p-values for alternatives were swapped; + # this has been corrected and the tests have been refactored for + # compactness, but otherwise the tests are unchanged. + # R code for comparison, e.g.: + # options(digits = 16) + # x = c(210.052110, 110.190630, 307.918612) + # y = c(436.08811482466416, 416.37397329768191, 179.96975939463582, + # 197.8118754228619, 34.038757281225756, 138.54220550921517, + # 128.7769351470246, 265.92721427951852, 275.6617533155341, + # 592.34083395416258, 448.73177590617018, 300.61495185038905, + # 187.97508449019588) + # wilcox.test(x, y, alternative="g", exact=TRUE) + cases_basic = [[{"alternative": 'two-sided', "method": "asymptotic"}, + (16, 0.6865041817876)], + [{"alternative": 'less', "method": "asymptotic"}, + (16, 0.3432520908938)], + [{"alternative": 'greater', "method": "asymptotic"}, + (16, 0.7047591913255)], + [{"alternative": 'two-sided', "method": "exact"}, + (16, 0.7035714285714)], + [{"alternative": 'less', "method": "exact"}, + (16, 0.3517857142857)], + [{"alternative": 'greater', "method": "exact"}, + (16, 0.6946428571429)]] + + @pytest.mark.parametrize(("kwds", "expected"), cases_basic) + def test_basic(self, kwds, expected): + res = mannwhitneyu(self.x, self.y, **kwds) + assert_allclose(res, expected) + + cases_continuity = [[{"alternative": 'two-sided', "use_continuity": True}, + (23, 0.6865041817876)], + [{"alternative": 'less', "use_continuity": True}, + (23, 0.7047591913255)], + [{"alternative": 'greater', "use_continuity": True}, + (23, 0.3432520908938)], + [{"alternative": 'two-sided', "use_continuity": False}, + (23, 0.6377328900502)], + [{"alternative": 'less', "use_continuity": False}, + (23, 0.6811335549749)], + [{"alternative": 'greater', "use_continuity": False}, + (23, 0.3188664450251)]] + + @pytest.mark.parametrize(("kwds", "expected"), cases_continuity) + def test_continuity(self, kwds, expected): + # When x and y are interchanged, less and greater p-values should + # swap (compare to above). This wouldn't happen if the continuity + # correction were applied in the wrong direction. Note that less and + # greater p-values do not sum to 1 when continuity correction is on, + # which is what we'd expect. Also check that results match R when + # continuity correction is turned off. + # Note that method='asymptotic' -> exact=FALSE + # and use_continuity=False -> correct=FALSE, e.g.: + # wilcox.test(x, y, alternative="t", exact=FALSE, correct=FALSE) + res = mannwhitneyu(self.y, self.x, method='asymptotic', **kwds) + assert_allclose(res, expected) + + def test_tie_correct(self): + # Test tie correction against R's wilcox.test + # options(digits = 16) + # x = c(1, 2, 3, 4) + # y = c(1, 2, 3, 4, 5) + # wilcox.test(x, y, exact=FALSE) + x = [1, 2, 3, 4] + y0 = np.array([1, 2, 3, 4, 5]) + dy = np.array([0, 1, 0, 1, 0])*0.01 + dy2 = np.array([0, 0, 1, 0, 0])*0.01 + y = [y0-0.01, y0-dy, y0-dy2, y0, y0+dy2, y0+dy, y0+0.01] + res = mannwhitneyu(x, y, axis=-1, method="asymptotic") + U_expected = [10, 9, 8.5, 8, 7.5, 7, 6] + p_expected = [1, 0.9017048037317, 0.804080657472, 0.7086240584439, + 0.6197963884941, 0.5368784563079, 0.3912672792826] + assert_equal(res.statistic, U_expected) + assert_allclose(res.pvalue, p_expected) + + # --- Test Exact Distribution of U --- + + # These are tabulated values of the CDF of the exact distribution of + # the test statistic from pg 52 of reference [1] (Mann-Whitney Original) + pn3 = {1: [0.25, 0.5, 0.75], 2: [0.1, 0.2, 0.4, 0.6], + 3: [0.05, .1, 0.2, 0.35, 0.5, 0.65]} + pn4 = {1: [0.2, 0.4, 0.6], 2: [0.067, 0.133, 0.267, 0.4, 0.6], + 3: [0.028, 0.057, 0.114, 0.2, .314, 0.429, 0.571], + 4: [0.014, 0.029, 0.057, 0.1, 0.171, 0.243, 0.343, 0.443, 0.557]} + pm5 = {1: [0.167, 0.333, 0.5, 0.667], + 2: [0.047, 0.095, 0.19, 0.286, 0.429, 0.571], + 3: [0.018, 0.036, 0.071, 0.125, 0.196, 0.286, 0.393, 0.5, 0.607], + 4: [0.008, 0.016, 0.032, 0.056, 0.095, 0.143, + 0.206, 0.278, 0.365, 0.452, 0.548], + 5: [0.004, 0.008, 0.016, 0.028, 0.048, 0.075, 0.111, + 0.155, 0.21, 0.274, 0.345, .421, 0.5, 0.579]} + pm6 = {1: [0.143, 0.286, 0.428, 0.571], + 2: [0.036, 0.071, 0.143, 0.214, 0.321, 0.429, 0.571], + 3: [0.012, 0.024, 0.048, 0.083, 0.131, + 0.19, 0.274, 0.357, 0.452, 0.548], + 4: [0.005, 0.01, 0.019, 0.033, 0.057, 0.086, 0.129, + 0.176, 0.238, 0.305, 0.381, 0.457, 0.543], # the last element + # of the previous list, 0.543, has been modified from 0.545; + # I assume it was a typo + 5: [0.002, 0.004, 0.009, 0.015, 0.026, 0.041, 0.063, 0.089, + 0.123, 0.165, 0.214, 0.268, 0.331, 0.396, 0.465, 0.535], + 6: [0.001, 0.002, 0.004, 0.008, 0.013, 0.021, 0.032, 0.047, + 0.066, 0.09, 0.12, 0.155, 0.197, 0.242, 0.294, 0.350, + 0.409, 0.469, 0.531]} + + def test_exact_distribution(self): + # I considered parametrize. I decided against it. + p_tables = {3: self.pn3, 4: self.pn4, 5: self.pm5, 6: self.pm6} + for n, table in p_tables.items(): + for m, p in table.items(): + # check p-value against table + u = np.arange(0, len(p)) + assert_allclose(_mwu_state.cdf(k=u, m=m, n=n), p, atol=1e-3) + + # check identity CDF + SF - PMF = 1 + # ( In this implementation, SF(U) includes PMF(U) ) + u2 = np.arange(0, m*n+1) + assert_allclose(_mwu_state.cdf(k=u2, m=m, n=n) + + _mwu_state.sf(k=u2, m=m, n=n) + - _mwu_state.pmf(k=u2, m=m, n=n), 1) + + # check symmetry about mean of U, i.e. pmf(U) = pmf(m*n-U) + pmf = _mwu_state.pmf(k=u2, m=m, n=n) + assert_allclose(pmf, pmf[::-1]) + + # check symmetry w.r.t. interchange of m, n + pmf2 = _mwu_state.pmf(k=u2, m=n, n=m) + assert_allclose(pmf, pmf2) + + def test_asymptotic_behavior(self): + np.random.seed(0) + + # for small samples, the asymptotic test is not very accurate + x = np.random.rand(5) + y = np.random.rand(5) + res1 = mannwhitneyu(x, y, method="exact") + res2 = mannwhitneyu(x, y, method="asymptotic") + assert res1.statistic == res2.statistic + assert np.abs(res1.pvalue - res2.pvalue) > 1e-2 + + # for large samples, they agree reasonably well + x = np.random.rand(40) + y = np.random.rand(40) + res1 = mannwhitneyu(x, y, method="exact") + res2 = mannwhitneyu(x, y, method="asymptotic") + assert res1.statistic == res2.statistic + assert np.abs(res1.pvalue - res2.pvalue) < 1e-3 + + # --- Test Corner Cases --- + + def test_exact_U_equals_mean(self): + # Test U == m*n/2 with exact method + # Without special treatment, two-sided p-value > 1 because both + # one-sided p-values are > 0.5 + res_l = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="less", + method="exact") + res_g = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="greater", + method="exact") + assert_equal(res_l.pvalue, res_g.pvalue) + assert res_l.pvalue > 0.5 + + res = mannwhitneyu([1, 2, 3], [1.5, 2.5], alternative="two-sided", + method="exact") + assert_equal(res, (3, 1)) + # U == m*n/2 for asymptotic case tested in test_gh_2118 + # The reason it's tricky for the asymptotic test has to do with + # continuity correction. + + cases_scalar = [[{"alternative": 'two-sided', "method": "asymptotic"}, + (0, 1)], + [{"alternative": 'less', "method": "asymptotic"}, + (0, 0.5)], + [{"alternative": 'greater', "method": "asymptotic"}, + (0, 0.977249868052)], + [{"alternative": 'two-sided', "method": "exact"}, (0, 1)], + [{"alternative": 'less', "method": "exact"}, (0, 0.5)], + [{"alternative": 'greater', "method": "exact"}, (0, 1)]] + + @pytest.mark.parametrize(("kwds", "result"), cases_scalar) + def test_scalar_data(self, kwds, result): + # just making sure scalars work + assert_allclose(mannwhitneyu(1, 2, **kwds), result) + + def test_equal_scalar_data(self): + # when two scalars are equal, there is an -0.5/0 in the asymptotic + # approximation. R gives pvalue=1.0 for alternatives 'less' and + # 'greater' but NA for 'two-sided'. I don't see why, so I don't + # see a need for a special case to match that behavior. + assert_equal(mannwhitneyu(1, 1, method="exact"), (0.5, 1)) + assert_equal(mannwhitneyu(1, 1, method="asymptotic"), (0.5, 1)) + + # without continuity correction, this becomes 0/0, which really + # is undefined + assert_equal(mannwhitneyu(1, 1, method="asymptotic", + use_continuity=False), (0.5, np.nan)) + + # --- Test Enhancements / Bug Reports --- + + @pytest.mark.parametrize("method", ["asymptotic", "exact"]) + def test_gh_12837_11113(self, method): + # Test that behavior for broadcastable nd arrays is appropriate: + # output shape is correct and all values are equal to when the test + # is performed on one pair of samples at a time. + # Tests that gh-12837 and gh-11113 (requests for n-d input) + # are resolved + np.random.seed(0) + + # arrays are broadcastable except for axis = -3 + axis = -3 + m, n = 7, 10 # sample sizes + x = np.random.rand(m, 3, 8) + y = np.random.rand(6, n, 1, 8) + 0.1 + res = mannwhitneyu(x, y, method=method, axis=axis) + + shape = (6, 3, 8) # appropriate shape of outputs, given inputs + assert res.pvalue.shape == shape + assert res.statistic.shape == shape + + # move axis of test to end for simplicity + x, y = np.moveaxis(x, axis, -1), np.moveaxis(y, axis, -1) + + x = x[None, ...] # give x a zeroth dimension + assert x.ndim == y.ndim + + x = np.broadcast_to(x, shape + (m,)) + y = np.broadcast_to(y, shape + (n,)) + assert x.shape[:-1] == shape + assert y.shape[:-1] == shape + + # loop over pairs of samples + statistics = np.zeros(shape) + pvalues = np.zeros(shape) + for indices in product(*[range(i) for i in shape]): + xi = x[indices] + yi = y[indices] + temp = mannwhitneyu(xi, yi, method=method) + statistics[indices] = temp.statistic + pvalues[indices] = temp.pvalue + + np.testing.assert_equal(res.pvalue, pvalues) + np.testing.assert_equal(res.statistic, statistics) + + def test_gh_11355(self): + # Test for correct behavior with NaN/Inf in input + x = [1, 2, 3, 4] + y = [3, 6, 7, 8, 9, 3, 2, 1, 4, 4, 5] + res1 = mannwhitneyu(x, y) + + # Inf is not a problem. This is a rank test, and it's the largest value + y[4] = np.inf + res2 = mannwhitneyu(x, y) + + assert_equal(res1.statistic, res2.statistic) + assert_equal(res1.pvalue, res2.pvalue) + + # NaNs should propagate by default. + y[4] = np.nan + res3 = mannwhitneyu(x, y) + assert_equal(res3.statistic, np.nan) + assert_equal(res3.pvalue, np.nan) + + cases_11355 = [([1, 2, 3, 4], + [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5], + 10, 0.1297704873477), + ([1, 2, 3, 4], + [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5], + 8.5, 0.08735617507695), + ([1, 2, np.inf, 4], + [3, 6, 7, 8, np.inf, 3, 2, 1, 4, 4, 5], + 17.5, 0.5988856695752), + ([1, 2, np.inf, 4], + [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5], + 16, 0.4687165824462), + ([1, np.inf, np.inf, 4], + [3, 6, 7, 8, np.inf, np.inf, 2, 1, 4, 4, 5], + 24.5, 0.7912517950119)] + + @pytest.mark.parametrize(("x", "y", "statistic", "pvalue"), cases_11355) + def test_gh_11355b(self, x, y, statistic, pvalue): + # Test for correct behavior with NaN/Inf in input + res = mannwhitneyu(x, y, method='asymptotic') + assert_allclose(res.statistic, statistic, atol=1e-12) + assert_allclose(res.pvalue, pvalue, atol=1e-12) + + cases_9184 = [[True, "less", "asymptotic", 0.900775348204], + [True, "greater", "asymptotic", 0.1223118025635], + [True, "two-sided", "asymptotic", 0.244623605127], + [False, "less", "asymptotic", 0.8896643190401], + [False, "greater", "asymptotic", 0.1103356809599], + [False, "two-sided", "asymptotic", 0.2206713619198], + [True, "less", "exact", 0.8967698967699], + [True, "greater", "exact", 0.1272061272061], + [True, "two-sided", "exact", 0.2544122544123]] + + @pytest.mark.parametrize(("use_continuity", "alternative", + "method", "pvalue_exp"), cases_9184) + def test_gh_9184(self, use_continuity, alternative, method, pvalue_exp): + # gh-9184 might be considered a doc-only bug. Please see the + # documentation to confirm that mannwhitneyu correctly notes + # that the output statistic is that of the first sample (x). In any + # case, check the case provided there against output from R. + # R code: + # options(digits=16) + # x <- c(0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46) + # y <- c(1.15, 0.88, 0.90, 0.74, 1.21) + # wilcox.test(x, y, alternative = "less", exact = FALSE) + # wilcox.test(x, y, alternative = "greater", exact = FALSE) + # wilcox.test(x, y, alternative = "two.sided", exact = FALSE) + # wilcox.test(x, y, alternative = "less", exact = FALSE, + # correct=FALSE) + # wilcox.test(x, y, alternative = "greater", exact = FALSE, + # correct=FALSE) + # wilcox.test(x, y, alternative = "two.sided", exact = FALSE, + # correct=FALSE) + # wilcox.test(x, y, alternative = "less", exact = TRUE) + # wilcox.test(x, y, alternative = "greater", exact = TRUE) + # wilcox.test(x, y, alternative = "two.sided", exact = TRUE) + statistic_exp = 35 + x = (0.80, 0.83, 1.89, 1.04, 1.45, 1.38, 1.91, 1.64, 0.73, 1.46) + y = (1.15, 0.88, 0.90, 0.74, 1.21) + res = mannwhitneyu(x, y, use_continuity=use_continuity, + alternative=alternative, method=method) + assert_equal(res.statistic, statistic_exp) + assert_allclose(res.pvalue, pvalue_exp) + + def test_gh_6897(self): + # Test for correct behavior with empty input + with assert_raises(ValueError, match="`x` and `y` must be of nonzero"): + mannwhitneyu([], []) + + def test_gh_4067(self): + # Test for correct behavior with all NaN input - default is propagate + a = np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) + b = np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) + res = mannwhitneyu(a, b) + assert_equal(res.statistic, np.nan) + assert_equal(res.pvalue, np.nan) + + # All cases checked against R wilcox.test, e.g. + # options(digits=16) + # x = c(1, 2, 3) + # y = c(1.5, 2.5) + # wilcox.test(x, y, exact=FALSE, alternative='less') + + cases_2118 = [[[1, 2, 3], [1.5, 2.5], "greater", (3, 0.6135850036578)], + [[1, 2, 3], [1.5, 2.5], "less", (3, 0.6135850036578)], + [[1, 2, 3], [1.5, 2.5], "two-sided", (3, 1.0)], + [[1, 2, 3], [2], "greater", (1.5, 0.681324055883)], + [[1, 2, 3], [2], "less", (1.5, 0.681324055883)], + [[1, 2, 3], [2], "two-sided", (1.5, 1)], + [[1, 2], [1, 2], "greater", (2, 0.667497228949)], + [[1, 2], [1, 2], "less", (2, 0.667497228949)], + [[1, 2], [1, 2], "two-sided", (2, 1)]] + + @pytest.mark.parametrize(["x", "y", "alternative", "expected"], cases_2118) + def test_gh_2118(self, x, y, alternative, expected): + # test cases in which U == m*n/2 when method is asymptotic + # applying continuity correction could result in p-value > 1 + res = mannwhitneyu(x, y, use_continuity=True, alternative=alternative, + method="asymptotic") + assert_allclose(res, expected, rtol=1e-12) + + def test_gh19692_smaller_table(self): + # In gh-19692, we noted that the shape of the cache used in calculating + # p-values was dependent on the order of the inputs because the sample + # sizes n1 and n2 changed. This was indicative of unnecessary cache + # growth and redundant calculation. Check that this is resolved. + rng = np.random.default_rng(7600451795963068007) + x = rng.random(size=5) + y = rng.random(size=11) + _mwu_state._fmnks = -np.ones((1, 1, 1)) # reset cache + stats.mannwhitneyu(x, y, method='exact') + shape = _mwu_state._fmnks.shape + assert shape[0] <= 6 and shape[1] <= 12 # one more than sizes + stats.mannwhitneyu(y, x, method='exact') + assert shape == _mwu_state._fmnks.shape # unchanged when sizes are reversed + + # Also, we weren't exploiting the symmmetry of the null distribution + # to its full potential. Ensure that the null distribution is not + # evaluated explicitly for `k > m*n/2`. + _mwu_state._fmnks = -np.ones((1, 1, 1)) # reset cache + stats.mannwhitneyu(x, 0*y, method='exact', alternative='greater') + shape = _mwu_state._fmnks.shape + assert shape[-1] == 1 # k is smallest possible + stats.mannwhitneyu(0*x, y, method='exact', alternative='greater') + assert shape == _mwu_state._fmnks.shape + + @pytest.mark.parametrize('alternative', ['less', 'greater', 'two-sided']) + def test_permutation_method(self, alternative): + rng = np.random.default_rng(7600451795963068007) + x = rng.random(size=(2, 5)) + y = rng.random(size=(2, 6)) + res = stats.mannwhitneyu(x, y, method=stats.PermutationMethod(), + alternative=alternative, axis=1) + res2 = stats.mannwhitneyu(x, y, method='exact', + alternative=alternative, axis=1) + assert_allclose(res.statistic, res2.statistic, rtol=1e-15) + assert_allclose(res.pvalue, res2.pvalue, rtol=1e-15) + + def teardown_method(self): + _mwu_state._recursive = None + + +class TestMannWhitneyU_iterative(TestMannWhitneyU): + def setup_method(self): + _mwu_state._recursive = False + + def teardown_method(self): + _mwu_state._recursive = None + + +@pytest.mark.xslow +def test_mann_whitney_u_switch(): + # Check that mannwhiteneyu switches between recursive and iterative + # implementations at n = 500 + + # ensure that recursion is not enforced + _mwu_state._recursive = None + _mwu_state._fmnks = -np.ones((1, 1, 1)) + + rng = np.random.default_rng(9546146887652) + x = rng.random(5) + + # use iterative algorithm because n > 500 + y = rng.random(501) + stats.mannwhitneyu(x, y, method='exact') + # iterative algorithm doesn't modify _mwu_state._fmnks + assert np.all(_mwu_state._fmnks == -1) + + # use recursive algorithm because n <= 500 + y = rng.random(500) + stats.mannwhitneyu(x, y, method='exact') + + # recursive algorithm has modified _mwu_state._fmnks + assert not np.all(_mwu_state._fmnks == -1) + + +class TestSomersD(_TestPythranFunc): + def setup_method(self): + self.dtypes = self.ALL_INTEGER + self.ALL_FLOAT + self.arguments = {0: (np.arange(10), + self.ALL_INTEGER + self.ALL_FLOAT), + 1: (np.arange(10), + self.ALL_INTEGER + self.ALL_FLOAT)} + input_array = [self.arguments[idx][0] for idx in self.arguments] + # In this case, self.partialfunc can simply be stats.somersd, + # since `alternative` is an optional argument. If it is required, + # we can use functools.partial to freeze the value, because + # we only mainly test various array inputs, not str, etc. + self.partialfunc = functools.partial(stats.somersd, + alternative='two-sided') + self.expected = self.partialfunc(*input_array) + + def pythranfunc(self, *args): + res = self.partialfunc(*args) + assert_allclose(res.statistic, self.expected.statistic, atol=1e-15) + assert_allclose(res.pvalue, self.expected.pvalue, atol=1e-15) + + def test_pythranfunc_keywords(self): + # Not specifying the optional keyword args + table = [[27, 25, 14, 7, 0], [7, 14, 18, 35, 12], [1, 3, 2, 7, 17]] + res1 = stats.somersd(table) + # Specifying the optional keyword args with default value + optional_args = self.get_optional_args(stats.somersd) + res2 = stats.somersd(table, **optional_args) + # Check if the results are the same in two cases + assert_allclose(res1.statistic, res2.statistic, atol=1e-15) + assert_allclose(res1.pvalue, res2.pvalue, atol=1e-15) + + def test_like_kendalltau(self): + # All tests correspond with one in test_stats.py `test_kendalltau` + + # case without ties, con-dis equal zero + x = [5, 2, 1, 3, 6, 4, 7, 8] + y = [5, 2, 6, 3, 1, 8, 7, 4] + # Cross-check with result from SAS FREQ: + expected = (0.000000000000000, 1.000000000000000) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # case without ties, con-dis equal zero + x = [0, 5, 2, 1, 3, 6, 4, 7, 8] + y = [5, 2, 0, 6, 3, 1, 8, 7, 4] + # Cross-check with result from SAS FREQ: + expected = (0.000000000000000, 1.000000000000000) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # case without ties, con-dis close to zero + x = [5, 2, 1, 3, 6, 4, 7] + y = [5, 2, 6, 3, 1, 7, 4] + # Cross-check with result from SAS FREQ: + expected = (-0.142857142857140, 0.630326953157670) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # simple case without ties + x = np.arange(10) + y = np.arange(10) + # Cross-check with result from SAS FREQ: + # SAS p value is not provided. + expected = (1.000000000000000, 0) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # swap a couple values and a couple more + x = np.arange(10) + y = np.array([0, 2, 1, 3, 4, 6, 5, 7, 8, 9]) + # Cross-check with result from SAS FREQ: + expected = (0.911111111111110, 0.000000000000000) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # same in opposite direction + x = np.arange(10) + y = np.arange(10)[::-1] + # Cross-check with result from SAS FREQ: + # SAS p value is not provided. + expected = (-1.000000000000000, 0) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # swap a couple values and a couple more + x = np.arange(10) + y = np.array([9, 7, 8, 6, 5, 3, 4, 2, 1, 0]) + # Cross-check with result from SAS FREQ: + expected = (-0.9111111111111111, 0.000000000000000) + res = stats.somersd(x, y) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # with some ties + x1 = [12, 2, 1, 12, 2] + x2 = [1, 4, 7, 1, 0] + # Cross-check with result from SAS FREQ: + expected = (-0.500000000000000, 0.304901788178780) + res = stats.somersd(x1, x2) + assert_allclose(res.statistic, expected[0], atol=1e-15) + assert_allclose(res.pvalue, expected[1], atol=1e-15) + + # with only ties in one or both inputs + # SAS will not produce an output for these: + # NOTE: No statistics are computed for x * y because x has fewer + # than 2 nonmissing levels. + # WARNING: No OUTPUT data set is produced for this table because a + # row or column variable has fewer than 2 nonmissing levels and no + # statistics are computed. + + res = stats.somersd([2, 2, 2], [2, 2, 2]) + assert_allclose(res.statistic, np.nan) + assert_allclose(res.pvalue, np.nan) + + res = stats.somersd([2, 0, 2], [2, 2, 2]) + assert_allclose(res.statistic, np.nan) + assert_allclose(res.pvalue, np.nan) + + res = stats.somersd([2, 2, 2], [2, 0, 2]) + assert_allclose(res.statistic, np.nan) + assert_allclose(res.pvalue, np.nan) + + res = stats.somersd([0], [0]) + assert_allclose(res.statistic, np.nan) + assert_allclose(res.pvalue, np.nan) + + # empty arrays provided as input + res = stats.somersd([], []) + assert_allclose(res.statistic, np.nan) + assert_allclose(res.pvalue, np.nan) + + # test unequal length inputs + x = np.arange(10.) + y = np.arange(20.) + assert_raises(ValueError, stats.somersd, x, y) + + def test_asymmetry(self): + # test that somersd is asymmetric w.r.t. input order and that + # convention is as described: first input is row variable & independent + # data is from Wikipedia: + # https://en.wikipedia.org/wiki/Somers%27_D + # but currently that example contradicts itself - it says X is + # independent yet take D_XY + + x = [1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 1, 2, + 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3] + y = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2] + # Cross-check with result from SAS FREQ: + d_cr = 0.272727272727270 + d_rc = 0.342857142857140 + p = 0.092891940883700 # same p-value for either direction + res = stats.somersd(x, y) + assert_allclose(res.statistic, d_cr, atol=1e-15) + assert_allclose(res.pvalue, p, atol=1e-4) + assert_equal(res.table.shape, (3, 2)) + res = stats.somersd(y, x) + assert_allclose(res.statistic, d_rc, atol=1e-15) + assert_allclose(res.pvalue, p, atol=1e-15) + assert_equal(res.table.shape, (2, 3)) + + def test_somers_original(self): + # test against Somers' original paper [1] + + # Table 5A + # Somers' convention was column IV + table = np.array([[8, 2], [6, 5], [3, 4], [1, 3], [2, 3]]) + # Our convention (and that of SAS FREQ) is row IV + table = table.T + dyx = 129/340 + assert_allclose(stats.somersd(table).statistic, dyx) + + # table 7A - d_yx = 1 + table = np.array([[25, 0], [85, 0], [0, 30]]) + dxy, dyx = 3300/5425, 3300/3300 + assert_allclose(stats.somersd(table).statistic, dxy) + assert_allclose(stats.somersd(table.T).statistic, dyx) + + # table 7B - d_yx < 0 + table = np.array([[25, 0], [0, 30], [85, 0]]) + dyx = -1800/3300 + assert_allclose(stats.somersd(table.T).statistic, dyx) + + def test_contingency_table_with_zero_rows_cols(self): + # test that zero rows/cols in contingency table don't affect result + + N = 100 + shape = 4, 6 + size = np.prod(shape) + + np.random.seed(0) + s = stats.multinomial.rvs(N, p=np.ones(size)/size).reshape(shape) + res = stats.somersd(s) + + s2 = np.insert(s, 2, np.zeros(shape[1]), axis=0) + res2 = stats.somersd(s2) + + s3 = np.insert(s, 2, np.zeros(shape[0]), axis=1) + res3 = stats.somersd(s3) + + s4 = np.insert(s2, 2, np.zeros(shape[0]+1), axis=1) + res4 = stats.somersd(s4) + + # Cross-check with result from SAS FREQ: + assert_allclose(res.statistic, -0.116981132075470, atol=1e-15) + assert_allclose(res.statistic, res2.statistic) + assert_allclose(res.statistic, res3.statistic) + assert_allclose(res.statistic, res4.statistic) + + assert_allclose(res.pvalue, 0.156376448188150, atol=1e-15) + assert_allclose(res.pvalue, res2.pvalue) + assert_allclose(res.pvalue, res3.pvalue) + assert_allclose(res.pvalue, res4.pvalue) + + def test_invalid_contingency_tables(self): + N = 100 + shape = 4, 6 + size = np.prod(shape) + + np.random.seed(0) + # start with a valid contingency table + s = stats.multinomial.rvs(N, p=np.ones(size)/size).reshape(shape) + + s5 = s - 2 + message = "All elements of the contingency table must be non-negative" + with assert_raises(ValueError, match=message): + stats.somersd(s5) + + s6 = s + 0.01 + message = "All elements of the contingency table must be integer" + with assert_raises(ValueError, match=message): + stats.somersd(s6) + + message = ("At least two elements of the contingency " + "table must be nonzero.") + with assert_raises(ValueError, match=message): + stats.somersd([[]]) + + with assert_raises(ValueError, match=message): + stats.somersd([[1]]) + + s7 = np.zeros((3, 3)) + with assert_raises(ValueError, match=message): + stats.somersd(s7) + + s7[0, 1] = 1 + with assert_raises(ValueError, match=message): + stats.somersd(s7) + + def test_only_ranks_matter(self): + # only ranks of input data should matter + x = [1, 2, 3] + x2 = [-1, 2.1, np.inf] + y = [3, 2, 1] + y2 = [0, -0.5, -np.inf] + res = stats.somersd(x, y) + res2 = stats.somersd(x2, y2) + assert_equal(res.statistic, res2.statistic) + assert_equal(res.pvalue, res2.pvalue) + + def test_contingency_table_return(self): + # check that contingency table is returned + x = np.arange(10) + y = np.arange(10) + res = stats.somersd(x, y) + assert_equal(res.table, np.eye(10)) + + def test_somersd_alternative(self): + # Test alternative parameter, asymptotic method (due to tie) + + # Based on scipy.stats.test_stats.TestCorrSpearman2::test_alternative + x1 = [1, 2, 3, 4, 5] + x2 = [5, 6, 7, 8, 7] + + # strong positive correlation + expected = stats.somersd(x1, x2, alternative="two-sided") + assert expected.statistic > 0 + + # rank correlation > 0 -> large "less" p-value + res = stats.somersd(x1, x2, alternative="less") + assert_equal(res.statistic, expected.statistic) + assert_allclose(res.pvalue, 1 - (expected.pvalue / 2)) + + # rank correlation > 0 -> small "greater" p-value + res = stats.somersd(x1, x2, alternative="greater") + assert_equal(res.statistic, expected.statistic) + assert_allclose(res.pvalue, expected.pvalue / 2) + + # reverse the direction of rank correlation + x2.reverse() + + # strong negative correlation + expected = stats.somersd(x1, x2, alternative="two-sided") + assert expected.statistic < 0 + + # rank correlation < 0 -> large "greater" p-value + res = stats.somersd(x1, x2, alternative="greater") + assert_equal(res.statistic, expected.statistic) + assert_allclose(res.pvalue, 1 - (expected.pvalue / 2)) + + # rank correlation < 0 -> small "less" p-value + res = stats.somersd(x1, x2, alternative="less") + assert_equal(res.statistic, expected.statistic) + assert_allclose(res.pvalue, expected.pvalue / 2) + + with pytest.raises(ValueError, match="`alternative` must be..."): + stats.somersd(x1, x2, alternative="ekki-ekki") + + @pytest.mark.parametrize("positive_correlation", (False, True)) + def test_somersd_perfect_correlation(self, positive_correlation): + # Before the addition of `alternative`, perfect correlation was + # treated as a special case. Now it is treated like any other case, but + # make sure there are no divide by zero warnings or associated errors + + x1 = np.arange(10) + x2 = x1 if positive_correlation else np.flip(x1) + expected_statistic = 1 if positive_correlation else -1 + + # perfect correlation -> small "two-sided" p-value (0) + res = stats.somersd(x1, x2, alternative="two-sided") + assert res.statistic == expected_statistic + assert res.pvalue == 0 + + # rank correlation > 0 -> large "less" p-value (1) + res = stats.somersd(x1, x2, alternative="less") + assert res.statistic == expected_statistic + assert res.pvalue == (1 if positive_correlation else 0) + + # rank correlation > 0 -> small "greater" p-value (0) + res = stats.somersd(x1, x2, alternative="greater") + assert res.statistic == expected_statistic + assert res.pvalue == (0 if positive_correlation else 1) + + def test_somersd_large_inputs_gh18132(self): + # Test that large inputs where potential overflows could occur give + # the expected output. This is tested in the case of binary inputs. + # See gh-18126. + + # generate lists of random classes 1-2 (binary) + classes = [1, 2] + n_samples = 10 ** 6 + random.seed(6272161) + x = random.choices(classes, k=n_samples) + y = random.choices(classes, k=n_samples) + + # get value to compare with: sklearn output + # from sklearn import metrics + # val_auc_sklearn = metrics.roc_auc_score(x, y) + # # convert to the Gini coefficient (Gini = (AUC*2)-1) + # val_sklearn = 2 * val_auc_sklearn - 1 + val_sklearn = -0.001528138777036947 + + # calculate the Somers' D statistic, which should be equal to the + # result of val_sklearn until approximately machine precision + val_scipy = stats.somersd(x, y).statistic + assert_allclose(val_sklearn, val_scipy, atol=1e-15) + + +class TestBarnardExact: + """Some tests to show that barnard_exact() works correctly.""" + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[43, 40], [10, 39]], (3.555406779643, 0.000362832367)), + ([[100, 2], [1000, 5]], (-1.776382925679, 0.135126970878)), + ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)), + ([[5, 1], [10, 10]], (1.449486150679, 0.156277546306)), + ([[5, 15], [20, 20]], (-1.851640199545, 0.066363501421)), + ([[5, 16], [20, 25]], (-1.609639949352, 0.116984852192)), + ([[10, 5], [10, 1]], (-1.449486150679, 0.177536588915)), + ([[5, 0], [1, 4]], (2.581988897472, 0.013671875000)), + ([[0, 1], [3, 2]], (-1.095445115010, 0.509667991877)), + ([[0, 2], [6, 4]], (-1.549193338483, 0.197019618792)), + ([[2, 7], [8, 2]], (-2.518474945157, 0.019210815430)), + ], + ) + def test_precise(self, input_sample, expected): + """The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-6 : + ```R + library(Barnard) + options(digits=10) + barnard.test(43, 40, 10, 39, dp=1e-6, pooled=TRUE) + ``` + """ + res = barnard_exact(input_sample) + statistic, pvalue = res.statistic, res.pvalue + assert_allclose([statistic, pvalue], expected) + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[43, 40], [10, 39]], (3.920362887717, 0.000289470662)), + ([[100, 2], [1000, 5]], (-1.139432816087, 0.950272080594)), + ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)), + ([[5, 1], [10, 10]], (1.622375939458, 0.150599922226)), + ([[5, 15], [20, 20]], (-1.974771239528, 0.063038448651)), + ([[5, 16], [20, 25]], (-1.722122973346, 0.133329494287)), + ([[10, 5], [10, 1]], (-1.765469659009, 0.250566655215)), + ([[5, 0], [1, 4]], (5.477225575052, 0.007812500000)), + ([[0, 1], [3, 2]], (-1.224744871392, 0.509667991877)), + ([[0, 2], [6, 4]], (-1.732050807569, 0.197019618792)), + ([[2, 7], [8, 2]], (-3.079373904042, 0.020172119141)), + ], + ) + def test_pooled_param(self, input_sample, expected): + """The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-6 : + ```R + library(Barnard) + options(digits=10) + barnard.test(43, 40, 10, 39, dp=1e-6, pooled=FALSE) + ``` + """ + res = barnard_exact(input_sample, pooled=False) + statistic, pvalue = res.statistic, res.pvalue + assert_allclose([statistic, pvalue], expected) + + def test_raises(self): + # test we raise an error for wrong input number of nuisances. + error_msg = ( + "Number of points `n` must be strictly positive, found 0" + ) + with assert_raises(ValueError, match=error_msg): + barnard_exact([[1, 2], [3, 4]], n=0) + + # test we raise an error for wrong shape of input. + error_msg = "The input `table` must be of shape \\(2, 2\\)." + with assert_raises(ValueError, match=error_msg): + barnard_exact(np.arange(6).reshape(2, 3)) + + # Test all values must be positives + error_msg = "All values in `table` must be nonnegative." + with assert_raises(ValueError, match=error_msg): + barnard_exact([[-1, 2], [3, 4]]) + + # Test value error on wrong alternative param + error_msg = ( + "`alternative` should be one of {'two-sided', 'less', 'greater'}," + " found .*" + ) + with assert_raises(ValueError, match=error_msg): + barnard_exact([[1, 2], [3, 4]], "not-correct") + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[0, 0], [4, 3]], (1.0, 0)), + ], + ) + def test_edge_cases(self, input_sample, expected): + res = barnard_exact(input_sample) + statistic, pvalue = res.statistic, res.pvalue + assert_equal(pvalue, expected[0]) + assert_equal(statistic, expected[1]) + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[0, 5], [0, 10]], (1.0, np.nan)), + ([[5, 0], [10, 0]], (1.0, np.nan)), + ], + ) + def test_row_or_col_zero(self, input_sample, expected): + res = barnard_exact(input_sample) + statistic, pvalue = res.statistic, res.pvalue + assert_equal(pvalue, expected[0]) + assert_equal(statistic, expected[1]) + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[2, 7], [8, 2]], (-2.518474945157, 0.009886140845)), + ([[7, 200], [300, 8]], (-21.320036698460, 0.0)), + ([[21, 28], [1957, 6]], (-30.489638143953, 0.0)), + ], + ) + @pytest.mark.parametrize("alternative", ["greater", "less"]) + def test_less_greater(self, input_sample, expected, alternative): + """ + "The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-6 : + ```R + library(Barnard) + options(digits=10) + a = barnard.test(2, 7, 8, 2, dp=1e-6, pooled=TRUE) + a$p.value[1] + ``` + In this test, we are using the "one-sided" return value `a$p.value[1]` + to test our pvalue. + """ + expected_stat, less_pvalue_expect = expected + + if alternative == "greater": + input_sample = np.array(input_sample)[:, ::-1] + expected_stat = -expected_stat + + res = barnard_exact(input_sample, alternative=alternative) + statistic, pvalue = res.statistic, res.pvalue + assert_allclose( + [statistic, pvalue], [expected_stat, less_pvalue_expect], atol=1e-7 + ) + + +class TestBoschlooExact: + """Some tests to show that boschloo_exact() works correctly.""" + + ATOL = 1e-7 + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[2, 7], [8, 2]], (0.01852173, 0.009886142)), + ([[5, 1], [10, 10]], (0.9782609, 0.9450994)), + ([[5, 16], [20, 25]], (0.08913823, 0.05827348)), + ([[10, 5], [10, 1]], (0.1652174, 0.08565611)), + ([[5, 0], [1, 4]], (1, 1)), + ([[0, 1], [3, 2]], (0.5, 0.34375)), + ([[2, 7], [8, 2]], (0.01852173, 0.009886142)), + ([[7, 12], [8, 3]], (0.06406797, 0.03410916)), + ([[10, 24], [25, 37]], (0.2009359, 0.1512882)), + ], + ) + def test_less(self, input_sample, expected): + """The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-8 : + ```R + library(Exact) + options(digits=10) + data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE) + a = exact.test(data, method="Boschloo", alternative="less", + tsmethod="central", np.interval=TRUE, beta=1e-8) + ``` + """ + res = boschloo_exact(input_sample, alternative="less") + statistic, pvalue = res.statistic, res.pvalue + assert_allclose([statistic, pvalue], expected, atol=self.ATOL) + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[43, 40], [10, 39]], (0.0002875544, 0.0001615562)), + ([[2, 7], [8, 2]], (0.9990149, 0.9918327)), + ([[5, 1], [10, 10]], (0.1652174, 0.09008534)), + ([[5, 15], [20, 20]], (0.9849087, 0.9706997)), + ([[5, 16], [20, 25]], (0.972349, 0.9524124)), + ([[5, 0], [1, 4]], (0.02380952, 0.006865367)), + ([[0, 1], [3, 2]], (1, 1)), + ([[0, 2], [6, 4]], (1, 1)), + ([[2, 7], [8, 2]], (0.9990149, 0.9918327)), + ([[7, 12], [8, 3]], (0.9895302, 0.9771215)), + ([[10, 24], [25, 37]], (0.9012936, 0.8633275)), + ], + ) + def test_greater(self, input_sample, expected): + """The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-8 : + ```R + library(Exact) + options(digits=10) + data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE) + a = exact.test(data, method="Boschloo", alternative="greater", + tsmethod="central", np.interval=TRUE, beta=1e-8) + ``` + """ + res = boschloo_exact(input_sample, alternative="greater") + statistic, pvalue = res.statistic, res.pvalue + assert_allclose([statistic, pvalue], expected, atol=self.ATOL) + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[43, 40], [10, 39]], (0.0002875544, 0.0003231115)), + ([[2, 7], [8, 2]], (0.01852173, 0.01977228)), + ([[5, 1], [10, 10]], (0.1652174, 0.1801707)), + ([[5, 16], [20, 25]], (0.08913823, 0.116547)), + ([[5, 0], [1, 4]], (0.02380952, 0.01373073)), + ([[0, 1], [3, 2]], (0.5, 0.6875)), + ([[2, 7], [8, 2]], (0.01852173, 0.01977228)), + ([[7, 12], [8, 3]], (0.06406797, 0.06821831)), + ], + ) + def test_two_sided(self, input_sample, expected): + """The expected values have been generated by R, using a resolution + for the nuisance parameter of 1e-8 : + ```R + library(Exact) + options(digits=10) + data <- matrix(c(43, 10, 40, 39), 2, 2, byrow=TRUE) + a = exact.test(data, method="Boschloo", alternative="two.sided", + tsmethod="central", np.interval=TRUE, beta=1e-8) + ``` + """ + res = boschloo_exact(input_sample, alternative="two-sided", n=64) + # Need n = 64 for python 32-bit + statistic, pvalue = res.statistic, res.pvalue + assert_allclose([statistic, pvalue], expected, atol=self.ATOL) + + def test_raises(self): + # test we raise an error for wrong input number of nuisances. + error_msg = ( + "Number of points `n` must be strictly positive, found 0" + ) + with assert_raises(ValueError, match=error_msg): + boschloo_exact([[1, 2], [3, 4]], n=0) + + # test we raise an error for wrong shape of input. + error_msg = "The input `table` must be of shape \\(2, 2\\)." + with assert_raises(ValueError, match=error_msg): + boschloo_exact(np.arange(6).reshape(2, 3)) + + # Test all values must be positives + error_msg = "All values in `table` must be nonnegative." + with assert_raises(ValueError, match=error_msg): + boschloo_exact([[-1, 2], [3, 4]]) + + # Test value error on wrong alternative param + error_msg = ( + r"`alternative` should be one of \('two-sided', 'less', " + r"'greater'\), found .*" + ) + with assert_raises(ValueError, match=error_msg): + boschloo_exact([[1, 2], [3, 4]], "not-correct") + + @pytest.mark.parametrize( + "input_sample,expected", + [ + ([[0, 5], [0, 10]], (np.nan, np.nan)), + ([[5, 0], [10, 0]], (np.nan, np.nan)), + ], + ) + def test_row_or_col_zero(self, input_sample, expected): + res = boschloo_exact(input_sample) + statistic, pvalue = res.statistic, res.pvalue + assert_equal(pvalue, expected[0]) + assert_equal(statistic, expected[1]) + + def test_two_sided_gt_1(self): + # Check that returned p-value does not exceed 1 even when twice + # the minimum of the one-sided p-values does. See gh-15345. + tbl = [[1, 1], [13, 12]] + pl = boschloo_exact(tbl, alternative='less').pvalue + pg = boschloo_exact(tbl, alternative='greater').pvalue + assert 2*min(pl, pg) > 1 + pt = boschloo_exact(tbl, alternative='two-sided').pvalue + assert pt == 1.0 + + @pytest.mark.parametrize("alternative", ("less", "greater")) + def test_against_fisher_exact(self, alternative): + # Check that the statistic of `boschloo_exact` is the same as the + # p-value of `fisher_exact` (for one-sided tests). See gh-15345. + tbl = [[2, 7], [8, 2]] + boschloo_stat = boschloo_exact(tbl, alternative=alternative).statistic + fisher_p = stats.fisher_exact(tbl, alternative=alternative)[1] + assert_allclose(boschloo_stat, fisher_p) + + +class TestCvm_2samp: + def test_invalid_input(self): + y = np.arange(5) + msg = 'x and y must contain at least two observations.' + with pytest.raises(ValueError, match=msg): + cramervonmises_2samp([], y) + with pytest.raises(ValueError, match=msg): + cramervonmises_2samp(y, [1]) + msg = 'method must be either auto, exact or asymptotic' + with pytest.raises(ValueError, match=msg): + cramervonmises_2samp(y, y, 'xyz') + + def test_list_input(self): + x = [2, 3, 4, 7, 6] + y = [0.2, 0.7, 12, 18] + r1 = cramervonmises_2samp(x, y) + r2 = cramervonmises_2samp(np.array(x), np.array(y)) + assert_equal((r1.statistic, r1.pvalue), (r2.statistic, r2.pvalue)) + + def test_example_conover(self): + # Example 2 in Section 6.2 of W.J. Conover: Practical Nonparametric + # Statistics, 1971. + x = [7.6, 8.4, 8.6, 8.7, 9.3, 9.9, 10.1, 10.6, 11.2] + y = [5.2, 5.7, 5.9, 6.5, 6.8, 8.2, 9.1, 9.8, 10.8, 11.3, 11.5, 12.3, + 12.5, 13.4, 14.6] + r = cramervonmises_2samp(x, y) + assert_allclose(r.statistic, 0.262, atol=1e-3) + assert_allclose(r.pvalue, 0.18, atol=1e-2) + + @pytest.mark.parametrize('statistic, m, n, pval', + [(710, 5, 6, 48./462), + (1897, 7, 7, 117./1716), + (576, 4, 6, 2./210), + (1764, 6, 7, 2./1716)]) + def test_exact_pvalue(self, statistic, m, n, pval): + # the exact values are taken from Anderson: On the distribution of the + # two-sample Cramer-von-Mises criterion, 1962. + # The values are taken from Table 2, 3, 4 and 5 + assert_equal(_pval_cvm_2samp_exact(statistic, m, n), pval) + + def test_large_sample(self): + # for large samples, the statistic U gets very large + # do a sanity check that p-value is not 0, 1 or nan + np.random.seed(4367) + x = distributions.norm.rvs(size=1000000) + y = distributions.norm.rvs(size=900000) + r = cramervonmises_2samp(x, y) + assert_(0 < r.pvalue < 1) + r = cramervonmises_2samp(x, y+0.1) + assert_(0 < r.pvalue < 1) + + def test_exact_vs_asymptotic(self): + np.random.seed(0) + x = np.random.rand(7) + y = np.random.rand(8) + r1 = cramervonmises_2samp(x, y, method='exact') + r2 = cramervonmises_2samp(x, y, method='asymptotic') + assert_equal(r1.statistic, r2.statistic) + assert_allclose(r1.pvalue, r2.pvalue, atol=1e-2) + + def test_method_auto(self): + x = np.arange(20) + y = [0.5, 4.7, 13.1] + r1 = cramervonmises_2samp(x, y, method='exact') + r2 = cramervonmises_2samp(x, y, method='auto') + assert_equal(r1.pvalue, r2.pvalue) + # switch to asymptotic if one sample has more than 20 observations + x = np.arange(21) + r1 = cramervonmises_2samp(x, y, method='asymptotic') + r2 = cramervonmises_2samp(x, y, method='auto') + assert_equal(r1.pvalue, r2.pvalue) + + def test_same_input(self): + # make sure trivial edge case can be handled + # note that _cdf_cvm_inf(0) = nan. implementation avoids nan by + # returning pvalue=1 for very small values of the statistic + x = np.arange(15) + res = cramervonmises_2samp(x, x) + assert_equal((res.statistic, res.pvalue), (0.0, 1.0)) + # check exact p-value + res = cramervonmises_2samp(x[:4], x[:4]) + assert_equal((res.statistic, res.pvalue), (0.0, 1.0)) + + +class TestTukeyHSD: + + data_same_size = ([24.5, 23.5, 26.4, 27.1, 29.9], + [28.4, 34.2, 29.5, 32.2, 30.1], + [26.1, 28.3, 24.3, 26.2, 27.8]) + data_diff_size = ([24.5, 23.5, 26.28, 26.4, 27.1, 29.9, 30.1, 30.1], + [28.4, 34.2, 29.5, 32.2, 30.1], + [26.1, 28.3, 24.3, 26.2, 27.8]) + extreme_size = ([24.5, 23.5, 26.4], + [28.4, 34.2, 29.5, 32.2, 30.1, 28.4, 34.2, 29.5, 32.2, + 30.1], + [26.1, 28.3, 24.3, 26.2, 27.8]) + + sas_same_size = """ + Comparison LowerCL Difference UpperCL Significance + 2 - 3 0.6908830568 4.34 7.989116943 1 + 2 - 1 0.9508830568 4.6 8.249116943 1 + 3 - 2 -7.989116943 -4.34 -0.6908830568 1 + 3 - 1 -3.389116943 0.26 3.909116943 0 + 1 - 2 -8.249116943 -4.6 -0.9508830568 1 + 1 - 3 -3.909116943 -0.26 3.389116943 0 + """ + + sas_diff_size = """ + Comparison LowerCL Difference UpperCL Significance + 2 - 1 0.2679292645 3.645 7.022070736 1 + 2 - 3 0.5934764007 4.34 8.086523599 1 + 1 - 2 -7.022070736 -3.645 -0.2679292645 1 + 1 - 3 -2.682070736 0.695 4.072070736 0 + 3 - 2 -8.086523599 -4.34 -0.5934764007 1 + 3 - 1 -4.072070736 -0.695 2.682070736 0 + """ + + sas_extreme = """ + Comparison LowerCL Difference UpperCL Significance + 2 - 3 1.561605075 4.34 7.118394925 1 + 2 - 1 2.740784879 6.08 9.419215121 1 + 3 - 2 -7.118394925 -4.34 -1.561605075 1 + 3 - 1 -1.964526566 1.74 5.444526566 0 + 1 - 2 -9.419215121 -6.08 -2.740784879 1 + 1 - 3 -5.444526566 -1.74 1.964526566 0 + """ + + @pytest.mark.parametrize("data,res_expect_str,atol", + ((data_same_size, sas_same_size, 1e-4), + (data_diff_size, sas_diff_size, 1e-4), + (extreme_size, sas_extreme, 1e-10), + ), + ids=["equal size sample", + "unequal sample size", + "extreme sample size differences"]) + def test_compare_sas(self, data, res_expect_str, atol): + ''' + SAS code used to generate results for each sample: + DATA ACHE; + INPUT BRAND RELIEF; + CARDS; + 1 24.5 + ... + 3 27.8 + ; + ods graphics on; ODS RTF;ODS LISTING CLOSE; + PROC ANOVA DATA=ACHE; + CLASS BRAND; + MODEL RELIEF=BRAND; + MEANS BRAND/TUKEY CLDIFF; + TITLE 'COMPARE RELIEF ACROSS MEDICINES - ANOVA EXAMPLE'; + ods output CLDiffs =tc; + proc print data=tc; + format LowerCL 17.16 UpperCL 17.16 Difference 17.16; + title "Output with many digits"; + RUN; + QUIT; + ODS RTF close; + ODS LISTING; + ''' + res_expect = np.asarray(res_expect_str.replace(" - ", " ").split()[5:], + dtype=float).reshape((6, 6)) + res_tukey = stats.tukey_hsd(*data) + conf = res_tukey.confidence_interval() + # loop over the comparisons + for i, j, l, s, h, sig in res_expect: + i, j = int(i) - 1, int(j) - 1 + assert_allclose(conf.low[i, j], l, atol=atol) + assert_allclose(res_tukey.statistic[i, j], s, atol=atol) + assert_allclose(conf.high[i, j], h, atol=atol) + assert_allclose((res_tukey.pvalue[i, j] <= .05), sig == 1) + + matlab_sm_siz = """ + 1 2 -8.2491590248597 -4.6 -0.9508409751403 0.0144483269098 + 1 3 -3.9091590248597 -0.26 3.3891590248597 0.9803107240900 + 2 3 0.6908409751403 4.34 7.9891590248597 0.0203311368795 + """ + + matlab_diff_sz = """ + 1 2 -7.02207069748501 -3.645 -0.26792930251500 0.03371498443080 + 1 3 -2.68207069748500 0.695 4.07207069748500 0.85572267328807 + 2 3 0.59347644287720 4.34 8.08652355712281 0.02259047020620 + """ + + @pytest.mark.parametrize("data,res_expect_str,atol", + ((data_same_size, matlab_sm_siz, 1e-12), + (data_diff_size, matlab_diff_sz, 1e-7)), + ids=["equal size sample", + "unequal size sample"]) + def test_compare_matlab(self, data, res_expect_str, atol): + """ + vals = [24.5, 23.5, 26.4, 27.1, 29.9, 28.4, 34.2, 29.5, 32.2, 30.1, + 26.1, 28.3, 24.3, 26.2, 27.8] + names = {'zero', 'zero', 'zero', 'zero', 'zero', 'one', 'one', 'one', + 'one', 'one', 'two', 'two', 'two', 'two', 'two'} + [p,t,stats] = anova1(vals,names,"off"); + [c,m,h,nms] = multcompare(stats, "CType","hsd"); + """ + res_expect = np.asarray(res_expect_str.split(), + dtype=float).reshape((3, 6)) + res_tukey = stats.tukey_hsd(*data) + conf = res_tukey.confidence_interval() + # loop over the comparisons + for i, j, l, s, h, p in res_expect: + i, j = int(i) - 1, int(j) - 1 + assert_allclose(conf.low[i, j], l, atol=atol) + assert_allclose(res_tukey.statistic[i, j], s, atol=atol) + assert_allclose(conf.high[i, j], h, atol=atol) + assert_allclose(res_tukey.pvalue[i, j], p, atol=atol) + + def test_compare_r(self): + """ + Testing against results and p-values from R: + from: https://www.rdocumentation.org/packages/stats/versions/3.6.2/ + topics/TukeyHSD + > require(graphics) + > summary(fm1 <- aov(breaks ~ tension, data = warpbreaks)) + > TukeyHSD(fm1, "tension", ordered = TRUE) + > plot(TukeyHSD(fm1, "tension")) + Tukey multiple comparisons of means + 95% family-wise confidence level + factor levels have been ordered + Fit: aov(formula = breaks ~ tension, data = warpbreaks) + $tension + """ + str_res = """ + diff lwr upr p adj + 2 - 3 4.722222 -4.8376022 14.28205 0.4630831 + 1 - 3 14.722222 5.1623978 24.28205 0.0014315 + 1 - 2 10.000000 0.4401756 19.55982 0.0384598 + """ + res_expect = np.asarray(str_res.replace(" - ", " ").split()[5:], + dtype=float).reshape((3, 6)) + data = ([26, 30, 54, 25, 70, 52, 51, 26, 67, + 27, 14, 29, 19, 29, 31, 41, 20, 44], + [18, 21, 29, 17, 12, 18, 35, 30, 36, + 42, 26, 19, 16, 39, 28, 21, 39, 29], + [36, 21, 24, 18, 10, 43, 28, 15, 26, + 20, 21, 24, 17, 13, 15, 15, 16, 28]) + + res_tukey = stats.tukey_hsd(*data) + conf = res_tukey.confidence_interval() + # loop over the comparisons + for i, j, s, l, h, p in res_expect: + i, j = int(i) - 1, int(j) - 1 + # atols are set to the number of digits present in the r result. + assert_allclose(conf.low[i, j], l, atol=1e-7) + assert_allclose(res_tukey.statistic[i, j], s, atol=1e-6) + assert_allclose(conf.high[i, j], h, atol=1e-5) + assert_allclose(res_tukey.pvalue[i, j], p, atol=1e-7) + + def test_engineering_stat_handbook(self): + ''' + Example sourced from: + https://www.itl.nist.gov/div898/handbook/prc/section4/prc471.htm + ''' + group1 = [6.9, 5.4, 5.8, 4.6, 4.0] + group2 = [8.3, 6.8, 7.8, 9.2, 6.5] + group3 = [8.0, 10.5, 8.1, 6.9, 9.3] + group4 = [5.8, 3.8, 6.1, 5.6, 6.2] + res = stats.tukey_hsd(group1, group2, group3, group4) + conf = res.confidence_interval() + lower = np.asarray([ + [0, 0, 0, -2.25], + [.29, 0, -2.93, .13], + [1.13, 0, 0, .97], + [0, 0, 0, 0]]) + upper = np.asarray([ + [0, 0, 0, 1.93], + [4.47, 0, 1.25, 4.31], + [5.31, 0, 0, 5.15], + [0, 0, 0, 0]]) + + for (i, j) in [(1, 0), (2, 0), (0, 3), (1, 2), (2, 3)]: + assert_allclose(conf.low[i, j], lower[i, j], atol=1e-2) + assert_allclose(conf.high[i, j], upper[i, j], atol=1e-2) + + def test_rand_symm(self): + # test some expected identities of the results + np.random.seed(1234) + data = np.random.rand(3, 100) + res = stats.tukey_hsd(*data) + conf = res.confidence_interval() + # the confidence intervals should be negated symmetric of each other + assert_equal(conf.low, -conf.high.T) + # the `high` and `low` center diagonals should be the same since the + # mean difference in a self comparison is 0. + assert_equal(np.diagonal(conf.high), conf.high[0, 0]) + assert_equal(np.diagonal(conf.low), conf.low[0, 0]) + # statistic array should be antisymmetric with zeros on the diagonal + assert_equal(res.statistic, -res.statistic.T) + assert_equal(np.diagonal(res.statistic), 0) + # p-values should be symmetric and 1 when compared to itself + assert_equal(res.pvalue, res.pvalue.T) + assert_equal(np.diagonal(res.pvalue), 1) + + def test_no_inf(self): + with assert_raises(ValueError, match="...must be finite."): + stats.tukey_hsd([1, 2, 3], [2, np.inf], [6, 7, 3]) + + def test_is_1d(self): + with assert_raises(ValueError, match="...must be one-dimensional"): + stats.tukey_hsd([[1, 2], [2, 3]], [2, 5], [5, 23, 6]) + + def test_no_empty(self): + with assert_raises(ValueError, match="...must be greater than one"): + stats.tukey_hsd([], [2, 5], [4, 5, 6]) + + @pytest.mark.parametrize("nargs", (0, 1)) + def test_not_enough_treatments(self, nargs): + with assert_raises(ValueError, match="...more than 1 treatment."): + stats.tukey_hsd(*([[23, 7, 3]] * nargs)) + + @pytest.mark.parametrize("cl", [-.5, 0, 1, 2]) + def test_conf_level_invalid(self, cl): + with assert_raises(ValueError, match="must be between 0 and 1"): + r = stats.tukey_hsd([23, 7, 3], [3, 4], [9, 4]) + r.confidence_interval(cl) + + def test_2_args_ttest(self): + # that with 2 treatments the `pvalue` is equal to that of `ttest_ind` + res_tukey = stats.tukey_hsd(*self.data_diff_size[:2]) + res_ttest = stats.ttest_ind(*self.data_diff_size[:2]) + assert_allclose(res_ttest.pvalue, res_tukey.pvalue[0, 1]) + assert_allclose(res_ttest.pvalue, res_tukey.pvalue[1, 0]) + + +class TestPoissonMeansTest: + @pytest.mark.parametrize("c1, n1, c2, n2, p_expect", ( + # example from [1], 6. Illustrative examples: Example 1 + [0, 100, 3, 100, 0.0884], + [2, 100, 6, 100, 0.1749] + )) + def test_paper_examples(self, c1, n1, c2, n2, p_expect): + res = stats.poisson_means_test(c1, n1, c2, n2) + assert_allclose(res.pvalue, p_expect, atol=1e-4) + + @pytest.mark.parametrize("c1, n1, c2, n2, p_expect, alt, d", ( + # These test cases are produced by the wrapped fortran code from the + # original authors. Using a slightly modified version of this fortran, + # found here, https://github.com/nolanbconaway/poisson-etest, + # additional tests were created. + [20, 10, 20, 10, 0.9999997568929630, 'two-sided', 0], + [10, 10, 10, 10, 0.9999998403241203, 'two-sided', 0], + [50, 15, 1, 1, 0.09920321053409643, 'two-sided', .05], + [3, 100, 20, 300, 0.12202725450896404, 'two-sided', 0], + [3, 12, 4, 20, 0.40416087318539173, 'greater', 0], + [4, 20, 3, 100, 0.008053640402974236, 'greater', 0], + # publishing paper does not include a `less` alternative, + # so it was calculated with switched argument order and + # alternative="greater" + [4, 20, 3, 10, 0.3083216325432898, 'less', 0], + [1, 1, 50, 15, 0.09322998607245102, 'less', 0] + )) + def test_fortran_authors(self, c1, n1, c2, n2, p_expect, alt, d): + res = stats.poisson_means_test(c1, n1, c2, n2, alternative=alt, diff=d) + assert_allclose(res.pvalue, p_expect, atol=2e-6, rtol=1e-16) + + def test_different_results(self): + # The implementation in Fortran is known to break down at higher + # counts and observations, so we expect different results. By + # inspection we can infer the p-value to be near one. + count1, count2 = 10000, 10000 + nobs1, nobs2 = 10000, 10000 + res = stats.poisson_means_test(count1, nobs1, count2, nobs2) + assert_allclose(res.pvalue, 1) + + def test_less_than_zero_lambda_hat2(self): + # demonstrates behavior that fixes a known fault from original Fortran. + # p-value should clearly be near one. + count1, count2 = 0, 0 + nobs1, nobs2 = 1, 1 + res = stats.poisson_means_test(count1, nobs1, count2, nobs2) + assert_allclose(res.pvalue, 1) + + def test_input_validation(self): + count1, count2 = 0, 0 + nobs1, nobs2 = 1, 1 + + # test non-integral events + message = '`k1` and `k2` must be integers.' + with assert_raises(TypeError, match=message): + stats.poisson_means_test(.7, nobs1, count2, nobs2) + with assert_raises(TypeError, match=message): + stats.poisson_means_test(count1, nobs1, .7, nobs2) + + # test negative events + message = '`k1` and `k2` must be greater than or equal to 0.' + with assert_raises(ValueError, match=message): + stats.poisson_means_test(-1, nobs1, count2, nobs2) + with assert_raises(ValueError, match=message): + stats.poisson_means_test(count1, nobs1, -1, nobs2) + + # test negative sample size + message = '`n1` and `n2` must be greater than 0.' + with assert_raises(ValueError, match=message): + stats.poisson_means_test(count1, -1, count2, nobs2) + with assert_raises(ValueError, match=message): + stats.poisson_means_test(count1, nobs1, count2, -1) + + # test negative difference + message = 'diff must be greater than or equal to 0.' + with assert_raises(ValueError, match=message): + stats.poisson_means_test(count1, nobs1, count2, nobs2, diff=-1) + + # test invalid alternatvie + message = 'Alternative must be one of ...' + with assert_raises(ValueError, match=message): + stats.poisson_means_test(1, 2, 1, 2, alternative='error') + + +class TestBWSTest: + + def test_bws_input_validation(self): + rng = np.random.default_rng(4571775098104213308) + + x, y = rng.random(size=(2, 7)) + + message = '`x` and `y` must be exactly one-dimensional.' + with pytest.raises(ValueError, match=message): + stats.bws_test([x, x], [y, y]) + + message = '`x` and `y` must not contain NaNs.' + with pytest.raises(ValueError, match=message): + stats.bws_test([np.nan], y) + + message = '`x` and `y` must be of nonzero size.' + with pytest.raises(ValueError, match=message): + stats.bws_test(x, []) + + message = 'alternative` must be one of...' + with pytest.raises(ValueError, match=message): + stats.bws_test(x, y, alternative='ekki-ekki') + + message = 'method` must be an instance of...' + with pytest.raises(ValueError, match=message): + stats.bws_test(x, y, method=42) + + + def test_against_published_reference(self): + # Test against Example 2 in bws_test Reference [1], pg 9 + # https://link.springer.com/content/pdf/10.1007/BF02762032.pdf + x = [1, 2, 3, 4, 6, 7, 8] + y = [5, 9, 10, 11, 12, 13, 14] + res = stats.bws_test(x, y, alternative='two-sided') + assert_allclose(res.statistic, 5.132, atol=1e-3) + assert_equal(res.pvalue, 10/3432) + + + @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'), + [('two-sided', 1.7510204081633, 0.1264422777777), + ('less', -1.7510204081633, 0.05754662004662), + ('greater', -1.7510204081633, 0.9424533799534)]) + def test_against_R(self, alternative, statistic, pvalue): + # Test against R library BWStest function bws_test + # library(BWStest) + # options(digits=16) + # x = c(...) + # y = c(...) + # bws_test(x, y, alternative='two.sided') + rng = np.random.default_rng(4571775098104213308) + x, y = rng.random(size=(2, 7)) + res = stats.bws_test(x, y, alternative=alternative) + assert_allclose(res.statistic, statistic, rtol=1e-13) + assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1) + + @pytest.mark.parametrize(('alternative', 'statistic', 'pvalue'), + [('two-sided', 1.142629265891, 0.2903950180801), + ('less', 0.99629665877411, 0.8545660222131), + ('greater', 0.99629665877411, 0.1454339777869)]) + def test_against_R_imbalanced(self, alternative, statistic, pvalue): + # Test against R library BWStest function bws_test + # library(BWStest) + # options(digits=16) + # x = c(...) + # y = c(...) + # bws_test(x, y, alternative='two.sided') + rng = np.random.default_rng(5429015622386364034) + x = rng.random(size=9) + y = rng.random(size=8) + res = stats.bws_test(x, y, alternative=alternative) + assert_allclose(res.statistic, statistic, rtol=1e-13) + assert_allclose(res.pvalue, pvalue, atol=1e-2, rtol=1e-1) + + def test_method(self): + # Test that `method` parameter has the desired effect + rng = np.random.default_rng(1520514347193347862) + x, y = rng.random(size=(2, 10)) + + rng = np.random.default_rng(1520514347193347862) + method = stats.PermutationMethod(n_resamples=10, random_state=rng) + res1 = stats.bws_test(x, y, method=method) + + assert len(res1.null_distribution) == 10 + + rng = np.random.default_rng(1520514347193347862) + method = stats.PermutationMethod(n_resamples=10, random_state=rng) + res2 = stats.bws_test(x, y, method=method) + + assert_allclose(res1.null_distribution, res2.null_distribution) + + rng = np.random.default_rng(5205143471933478621) + method = stats.PermutationMethod(n_resamples=10, random_state=rng) + res3 = stats.bws_test(x, y, method=method) + + assert not np.allclose(res3.null_distribution, res1.null_distribution) + + def test_directions(self): + # Sanity check of the sign of the one-sided statistic + rng = np.random.default_rng(1520514347193347862) + x = rng.random(size=5) + y = x - 1 + + res = stats.bws_test(x, y, alternative='greater') + assert res.statistic > 0 + assert_equal(res.pvalue, 1 / len(res.null_distribution)) + + res = stats.bws_test(x, y, alternative='less') + assert res.statistic > 0 + assert_equal(res.pvalue, 1) + + res = stats.bws_test(y, x, alternative='less') + assert res.statistic < 0 + assert_equal(res.pvalue, 1 / len(res.null_distribution)) + + res = stats.bws_test(y, x, alternative='greater') + assert res.statistic < 0 + assert_equal(res.pvalue, 1) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_kdeoth.py b/.venv/Lib/site-packages/scipy/stats/tests/test_kdeoth.py new file mode 100644 index 0000000000000000000000000000000000000000..37fc2beb92dc8f2aa0deaa28361aab826b82b42f --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_kdeoth.py @@ -0,0 +1,608 @@ +from scipy import stats, linalg, integrate +import numpy as np +from numpy.testing import (assert_almost_equal, assert_, assert_equal, + assert_array_almost_equal, + assert_array_almost_equal_nulp, assert_allclose) +import pytest +from pytest import raises as assert_raises + + +def test_kde_1d(): + #some basic tests comparing to normal distribution + np.random.seed(8765678) + n_basesample = 500 + xn = np.random.randn(n_basesample) + xnmean = xn.mean() + xnstd = xn.std(ddof=1) + + # get kde for original sample + gkde = stats.gaussian_kde(xn) + + # evaluate the density function for the kde for some points + xs = np.linspace(-7,7,501) + kdepdf = gkde.evaluate(xs) + normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd) + intervall = xs[1] - xs[0] + + assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01) + prob1 = gkde.integrate_box_1d(xnmean, np.inf) + prob2 = gkde.integrate_box_1d(-np.inf, xnmean) + assert_almost_equal(prob1, 0.5, decimal=1) + assert_almost_equal(prob2, 0.5, decimal=1) + assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13) + assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13) + + assert_almost_equal(gkde.integrate_kde(gkde), + (kdepdf**2).sum()*intervall, decimal=2) + assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2), + (kdepdf*normpdf).sum()*intervall, decimal=2) + + +def test_kde_1d_weighted(): + #some basic tests comparing to normal distribution + np.random.seed(8765678) + n_basesample = 500 + xn = np.random.randn(n_basesample) + wn = np.random.rand(n_basesample) + xnmean = np.average(xn, weights=wn) + xnstd = np.sqrt(np.average((xn-xnmean)**2, weights=wn)) + + # get kde for original sample + gkde = stats.gaussian_kde(xn, weights=wn) + + # evaluate the density function for the kde for some points + xs = np.linspace(-7,7,501) + kdepdf = gkde.evaluate(xs) + normpdf = stats.norm.pdf(xs, loc=xnmean, scale=xnstd) + intervall = xs[1] - xs[0] + + assert_(np.sum((kdepdf - normpdf)**2)*intervall < 0.01) + prob1 = gkde.integrate_box_1d(xnmean, np.inf) + prob2 = gkde.integrate_box_1d(-np.inf, xnmean) + assert_almost_equal(prob1, 0.5, decimal=1) + assert_almost_equal(prob2, 0.5, decimal=1) + assert_almost_equal(gkde.integrate_box(xnmean, np.inf), prob1, decimal=13) + assert_almost_equal(gkde.integrate_box(-np.inf, xnmean), prob2, decimal=13) + + assert_almost_equal(gkde.integrate_kde(gkde), + (kdepdf**2).sum()*intervall, decimal=2) + assert_almost_equal(gkde.integrate_gaussian(xnmean, xnstd**2), + (kdepdf*normpdf).sum()*intervall, decimal=2) + + +@pytest.mark.slow +def test_kde_2d(): + #some basic tests comparing to normal distribution + np.random.seed(8765678) + n_basesample = 500 + + mean = np.array([1.0, 3.0]) + covariance = np.array([[1.0, 2.0], [2.0, 6.0]]) + + # Need transpose (shape (2, 500)) for kde + xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T + + # get kde for original sample + gkde = stats.gaussian_kde(xn) + + # evaluate the density function for the kde for some points + x, y = np.mgrid[-7:7:500j, -7:7:500j] + grid_coords = np.vstack([x.ravel(), y.ravel()]) + kdepdf = gkde.evaluate(grid_coords) + kdepdf = kdepdf.reshape(500, 500) + + normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), + mean=mean, cov=covariance) + intervall = y.ravel()[1] - y.ravel()[0] + + assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01) + + small = -1e100 + large = 1e100 + prob1 = gkde.integrate_box([small, mean[1]], [large, large]) + prob2 = gkde.integrate_box([small, small], [large, mean[1]]) + + assert_almost_equal(prob1, 0.5, decimal=1) + assert_almost_equal(prob2, 0.5, decimal=1) + assert_almost_equal(gkde.integrate_kde(gkde), + (kdepdf**2).sum()*(intervall**2), decimal=2) + assert_almost_equal(gkde.integrate_gaussian(mean, covariance), + (kdepdf*normpdf).sum()*(intervall**2), decimal=2) + + +@pytest.mark.slow +def test_kde_2d_weighted(): + #some basic tests comparing to normal distribution + np.random.seed(8765678) + n_basesample = 500 + + mean = np.array([1.0, 3.0]) + covariance = np.array([[1.0, 2.0], [2.0, 6.0]]) + + # Need transpose (shape (2, 500)) for kde + xn = np.random.multivariate_normal(mean, covariance, size=n_basesample).T + wn = np.random.rand(n_basesample) + + # get kde for original sample + gkde = stats.gaussian_kde(xn, weights=wn) + + # evaluate the density function for the kde for some points + x, y = np.mgrid[-7:7:500j, -7:7:500j] + grid_coords = np.vstack([x.ravel(), y.ravel()]) + kdepdf = gkde.evaluate(grid_coords) + kdepdf = kdepdf.reshape(500, 500) + + normpdf = stats.multivariate_normal.pdf(np.dstack([x, y]), + mean=mean, cov=covariance) + intervall = y.ravel()[1] - y.ravel()[0] + + assert_(np.sum((kdepdf - normpdf)**2) * (intervall**2) < 0.01) + + small = -1e100 + large = 1e100 + prob1 = gkde.integrate_box([small, mean[1]], [large, large]) + prob2 = gkde.integrate_box([small, small], [large, mean[1]]) + + assert_almost_equal(prob1, 0.5, decimal=1) + assert_almost_equal(prob2, 0.5, decimal=1) + assert_almost_equal(gkde.integrate_kde(gkde), + (kdepdf**2).sum()*(intervall**2), decimal=2) + assert_almost_equal(gkde.integrate_gaussian(mean, covariance), + (kdepdf*normpdf).sum()*(intervall**2), decimal=2) + + +def test_kde_bandwidth_method(): + def scotts_factor(kde_obj): + """Same as default, just check that it works.""" + return np.power(kde_obj.n, -1./(kde_obj.d+4)) + + np.random.seed(8765678) + n_basesample = 50 + xn = np.random.randn(n_basesample) + + # Default + gkde = stats.gaussian_kde(xn) + # Supply a callable + gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor) + # Supply a scalar + gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor) + + xs = np.linspace(-7,7,51) + kdepdf = gkde.evaluate(xs) + kdepdf2 = gkde2.evaluate(xs) + assert_almost_equal(kdepdf, kdepdf2) + kdepdf3 = gkde3.evaluate(xs) + assert_almost_equal(kdepdf, kdepdf3) + + assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring') + + +def test_kde_bandwidth_method_weighted(): + def scotts_factor(kde_obj): + """Same as default, just check that it works.""" + return np.power(kde_obj.neff, -1./(kde_obj.d+4)) + + np.random.seed(8765678) + n_basesample = 50 + xn = np.random.randn(n_basesample) + + # Default + gkde = stats.gaussian_kde(xn) + # Supply a callable + gkde2 = stats.gaussian_kde(xn, bw_method=scotts_factor) + # Supply a scalar + gkde3 = stats.gaussian_kde(xn, bw_method=gkde.factor) + + xs = np.linspace(-7,7,51) + kdepdf = gkde.evaluate(xs) + kdepdf2 = gkde2.evaluate(xs) + assert_almost_equal(kdepdf, kdepdf2) + kdepdf3 = gkde3.evaluate(xs) + assert_almost_equal(kdepdf, kdepdf3) + + assert_raises(ValueError, stats.gaussian_kde, xn, bw_method='wrongstring') + + +# Subclasses that should stay working (extracted from various sources). +# Unfortunately the earlier design of gaussian_kde made it necessary for users +# to create these kinds of subclasses, or call _compute_covariance() directly. + +class _kde_subclass1(stats.gaussian_kde): + def __init__(self, dataset): + self.dataset = np.atleast_2d(dataset) + self.d, self.n = self.dataset.shape + self.covariance_factor = self.scotts_factor + self._compute_covariance() + + +class _kde_subclass2(stats.gaussian_kde): + def __init__(self, dataset): + self.covariance_factor = self.scotts_factor + super().__init__(dataset) + + +class _kde_subclass4(stats.gaussian_kde): + def covariance_factor(self): + return 0.5 * self.silverman_factor() + + +def test_gaussian_kde_subclassing(): + x1 = np.array([-7, -5, 1, 4, 5], dtype=float) + xs = np.linspace(-10, 10, num=50) + + # gaussian_kde itself + kde = stats.gaussian_kde(x1) + ys = kde(xs) + + # subclass 1 + kde1 = _kde_subclass1(x1) + y1 = kde1(xs) + assert_array_almost_equal_nulp(ys, y1, nulp=10) + + # subclass 2 + kde2 = _kde_subclass2(x1) + y2 = kde2(xs) + assert_array_almost_equal_nulp(ys, y2, nulp=10) + + # subclass 3 was removed because we have no obligation to maintain support + # for user invocation of private methods + + # subclass 4 + kde4 = _kde_subclass4(x1) + y4 = kde4(x1) + y_expected = [0.06292987, 0.06346938, 0.05860291, 0.08657652, 0.07904017] + + assert_array_almost_equal(y_expected, y4, decimal=6) + + # Not a subclass, but check for use of _compute_covariance() + kde5 = kde + kde5.covariance_factor = lambda: kde.factor + kde5._compute_covariance() + y5 = kde5(xs) + assert_array_almost_equal_nulp(ys, y5, nulp=10) + + +def test_gaussian_kde_covariance_caching(): + x1 = np.array([-7, -5, 1, 4, 5], dtype=float) + xs = np.linspace(-10, 10, num=5) + # These expected values are from scipy 0.10, before some changes to + # gaussian_kde. They were not compared with any external reference. + y_expected = [0.02463386, 0.04689208, 0.05395444, 0.05337754, 0.01664475] + + # Set the bandwidth, then reset it to the default. + kde = stats.gaussian_kde(x1) + kde.set_bandwidth(bw_method=0.5) + kde.set_bandwidth(bw_method='scott') + y2 = kde(xs) + + assert_array_almost_equal(y_expected, y2, decimal=7) + + +def test_gaussian_kde_monkeypatch(): + """Ugly, but people may rely on this. See scipy pull request 123, + specifically the linked ML thread "Width of the Gaussian in stats.kde". + If it is necessary to break this later on, that is to be discussed on ML. + """ + x1 = np.array([-7, -5, 1, 4, 5], dtype=float) + xs = np.linspace(-10, 10, num=50) + + # The old monkeypatched version to get at Silverman's Rule. + kde = stats.gaussian_kde(x1) + kde.covariance_factor = kde.silverman_factor + kde._compute_covariance() + y1 = kde(xs) + + # The new saner version. + kde2 = stats.gaussian_kde(x1, bw_method='silverman') + y2 = kde2(xs) + + assert_array_almost_equal_nulp(y1, y2, nulp=10) + + +def test_kde_integer_input(): + """Regression test for #1181.""" + x1 = np.arange(5) + kde = stats.gaussian_kde(x1) + y_expected = [0.13480721, 0.18222869, 0.19514935, 0.18222869, 0.13480721] + assert_array_almost_equal(kde(x1), y_expected, decimal=6) + + +_ftypes = ['float32', 'float64', 'float96', 'float128', 'int32', 'int64'] + + +@pytest.mark.parametrize("bw_type", _ftypes + ["scott", "silverman"]) +@pytest.mark.parametrize("dtype", _ftypes) +def test_kde_output_dtype(dtype, bw_type): + # Check whether the datatypes are available + dtype = getattr(np, dtype, None) + + if bw_type in ["scott", "silverman"]: + bw = bw_type + else: + bw_type = getattr(np, bw_type, None) + bw = bw_type(3) if bw_type else None + + if any(dt is None for dt in [dtype, bw]): + pytest.skip() + + weights = np.arange(5, dtype=dtype) + dataset = np.arange(5, dtype=dtype) + k = stats.gaussian_kde(dataset, bw_method=bw, weights=weights) + points = np.arange(5, dtype=dtype) + result = k(points) + # weights are always cast to float64 + assert result.dtype == np.result_type(dataset, points, np.float64(weights), + k.factor) + + +def test_pdf_logpdf_validation(): + rng = np.random.default_rng(64202298293133848336925499069837723291) + xn = rng.standard_normal((2, 10)) + gkde = stats.gaussian_kde(xn) + xs = rng.standard_normal((3, 10)) + + msg = "points have dimension 3, dataset has dimension 2" + with pytest.raises(ValueError, match=msg): + gkde.logpdf(xs) + + +def test_pdf_logpdf(): + np.random.seed(1) + n_basesample = 50 + xn = np.random.randn(n_basesample) + + # Default + gkde = stats.gaussian_kde(xn) + + xs = np.linspace(-15, 12, 25) + pdf = gkde.evaluate(xs) + pdf2 = gkde.pdf(xs) + assert_almost_equal(pdf, pdf2, decimal=12) + + logpdf = np.log(pdf) + logpdf2 = gkde.logpdf(xs) + assert_almost_equal(logpdf, logpdf2, decimal=12) + + # There are more points than data + gkde = stats.gaussian_kde(xs) + pdf = np.log(gkde.evaluate(xn)) + pdf2 = gkde.logpdf(xn) + assert_almost_equal(pdf, pdf2, decimal=12) + + +def test_pdf_logpdf_weighted(): + np.random.seed(1) + n_basesample = 50 + xn = np.random.randn(n_basesample) + wn = np.random.rand(n_basesample) + + # Default + gkde = stats.gaussian_kde(xn, weights=wn) + + xs = np.linspace(-15, 12, 25) + pdf = gkde.evaluate(xs) + pdf2 = gkde.pdf(xs) + assert_almost_equal(pdf, pdf2, decimal=12) + + logpdf = np.log(pdf) + logpdf2 = gkde.logpdf(xs) + assert_almost_equal(logpdf, logpdf2, decimal=12) + + # There are more points than data + gkde = stats.gaussian_kde(xs, weights=np.random.rand(len(xs))) + pdf = np.log(gkde.evaluate(xn)) + pdf2 = gkde.logpdf(xn) + assert_almost_equal(pdf, pdf2, decimal=12) + + +def test_marginal_1_axis(): + rng = np.random.default_rng(6111799263660870475) + n_data = 50 + n_dim = 10 + dataset = rng.normal(size=(n_dim, n_data)) + points = rng.normal(size=(n_dim, 3)) + + dimensions = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]) # dimensions to keep + + kde = stats.gaussian_kde(dataset) + marginal = kde.marginal(dimensions) + pdf = marginal.pdf(points[dimensions]) + + def marginal_pdf_single(point): + def f(x): + x = np.concatenate(([x], point[dimensions])) + return kde.pdf(x)[0] + return integrate.quad(f, -np.inf, np.inf)[0] + + def marginal_pdf(points): + return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points) + + ref = marginal_pdf(points) + + assert_allclose(pdf, ref, rtol=1e-6) + + +@pytest.mark.xslow +def test_marginal_2_axis(): + rng = np.random.default_rng(6111799263660870475) + n_data = 30 + n_dim = 4 + dataset = rng.normal(size=(n_dim, n_data)) + points = rng.normal(size=(n_dim, 3)) + + dimensions = np.array([1, 3]) # dimensions to keep + + kde = stats.gaussian_kde(dataset) + marginal = kde.marginal(dimensions) + pdf = marginal.pdf(points[dimensions]) + + def marginal_pdf(points): + def marginal_pdf_single(point): + def f(y, x): + w, z = point[dimensions] + x = np.array([x, w, y, z]) + return kde.pdf(x)[0] + return integrate.dblquad(f, -np.inf, np.inf, -np.inf, np.inf)[0] + + return np.apply_along_axis(marginal_pdf_single, axis=0, arr=points) + + ref = marginal_pdf(points) + + assert_allclose(pdf, ref, rtol=1e-6) + + +def test_marginal_iv(): + # test input validation + rng = np.random.default_rng(6111799263660870475) + n_data = 30 + n_dim = 4 + dataset = rng.normal(size=(n_dim, n_data)) + points = rng.normal(size=(n_dim, 3)) + + kde = stats.gaussian_kde(dataset) + + # check that positive and negative indices are equivalent + dimensions1 = [-1, 1] + marginal1 = kde.marginal(dimensions1) + pdf1 = marginal1.pdf(points[dimensions1]) + + dimensions2 = [3, -3] + marginal2 = kde.marginal(dimensions2) + pdf2 = marginal2.pdf(points[dimensions2]) + + assert_equal(pdf1, pdf2) + + # IV for non-integer dimensions + message = "Elements of `dimensions` must be integers..." + with pytest.raises(ValueError, match=message): + kde.marginal([1, 2.5]) + + # IV for uniquenes + message = "All elements of `dimensions` must be unique." + with pytest.raises(ValueError, match=message): + kde.marginal([1, 2, 2]) + + # IV for non-integer dimensions + message = (r"Dimensions \[-5 6\] are invalid for a distribution in 4...") + with pytest.raises(ValueError, match=message): + kde.marginal([1, -5, 6]) + + +@pytest.mark.xslow +def test_logpdf_overflow(): + # regression test for gh-12988; testing against linalg instability for + # very high dimensionality kde + np.random.seed(1) + n_dimensions = 2500 + n_samples = 5000 + xn = np.array([np.random.randn(n_samples) + (n) for n in range( + 0, n_dimensions)]) + + # Default + gkde = stats.gaussian_kde(xn) + + logpdf = gkde.logpdf(np.arange(0, n_dimensions)) + np.testing.assert_equal(np.isneginf(logpdf[0]), False) + np.testing.assert_equal(np.isnan(logpdf[0]), False) + + +def test_weights_intact(): + # regression test for gh-9709: weights are not modified + np.random.seed(12345) + vals = np.random.lognormal(size=100) + weights = np.random.choice([1.0, 10.0, 100], size=vals.size) + orig_weights = weights.copy() + + stats.gaussian_kde(np.log10(vals), weights=weights) + assert_allclose(weights, orig_weights, atol=1e-14, rtol=1e-14) + + +def test_weights_integer(): + # integer weights are OK, cf gh-9709 (comment) + np.random.seed(12345) + values = [0.2, 13.5, 21.0, 75.0, 99.0] + weights = [1, 2, 4, 8, 16] # a list of integers + pdf_i = stats.gaussian_kde(values, weights=weights) + pdf_f = stats.gaussian_kde(values, weights=np.float64(weights)) + + xn = [0.3, 11, 88] + assert_allclose(pdf_i.evaluate(xn), + pdf_f.evaluate(xn), atol=1e-14, rtol=1e-14) + + +def test_seed(): + # Test the seed option of the resample method + def test_seed_sub(gkde_trail): + n_sample = 200 + # The results should be different without using seed + samp1 = gkde_trail.resample(n_sample) + samp2 = gkde_trail.resample(n_sample) + assert_raises( + AssertionError, assert_allclose, samp1, samp2, atol=1e-13 + ) + # Use integer seed + seed = 831 + samp1 = gkde_trail.resample(n_sample, seed=seed) + samp2 = gkde_trail.resample(n_sample, seed=seed) + assert_allclose(samp1, samp2, atol=1e-13) + # Use RandomState + rstate1 = np.random.RandomState(seed=138) + samp1 = gkde_trail.resample(n_sample, seed=rstate1) + rstate2 = np.random.RandomState(seed=138) + samp2 = gkde_trail.resample(n_sample, seed=rstate2) + assert_allclose(samp1, samp2, atol=1e-13) + + # check that np.random.Generator can be used (numpy >= 1.17) + if hasattr(np.random, 'default_rng'): + # obtain a np.random.Generator object + rng = np.random.default_rng(1234) + gkde_trail.resample(n_sample, seed=rng) + + np.random.seed(8765678) + n_basesample = 500 + wn = np.random.rand(n_basesample) + # Test 1D case + xn_1d = np.random.randn(n_basesample) + + gkde_1d = stats.gaussian_kde(xn_1d) + test_seed_sub(gkde_1d) + gkde_1d_weighted = stats.gaussian_kde(xn_1d, weights=wn) + test_seed_sub(gkde_1d_weighted) + + # Test 2D case + mean = np.array([1.0, 3.0]) + covariance = np.array([[1.0, 2.0], [2.0, 6.0]]) + xn_2d = np.random.multivariate_normal(mean, covariance, size=n_basesample).T + + gkde_2d = stats.gaussian_kde(xn_2d) + test_seed_sub(gkde_2d) + gkde_2d_weighted = stats.gaussian_kde(xn_2d, weights=wn) + test_seed_sub(gkde_2d_weighted) + + +def test_singular_data_covariance_gh10205(): + # When the data lie in a lower-dimensional subspace and this causes + # and exception, check that the error message is informative. + rng = np.random.default_rng(2321583144339784787) + mu = np.array([1, 10, 20]) + sigma = np.array([[4, 10, 0], [10, 25, 0], [0, 0, 100]]) + data = rng.multivariate_normal(mu, sigma, 1000) + try: # doesn't raise any error on some platforms, and that's OK + stats.gaussian_kde(data.T) + except linalg.LinAlgError: + msg = "The data appears to lie in a lower-dimensional subspace..." + with assert_raises(linalg.LinAlgError, match=msg): + stats.gaussian_kde(data.T) + + +def test_fewer_points_than_dimensions_gh17436(): + # When the number of points is fewer than the number of dimensions, the + # the covariance matrix would be singular, and the exception tested in + # test_singular_data_covariance_gh10205 would occur. However, sometimes + # this occurs when the user passes in the transpose of what `gaussian_kde` + # expects. This can result in a huge covariance matrix, so bail early. + rng = np.random.default_rng(2046127537594925772) + rvs = rng.multivariate_normal(np.zeros(3), np.eye(3), size=5) + message = "Number of dimensions is greater than number of samples..." + with pytest.raises(ValueError, match=message): + stats.gaussian_kde(rvs) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_morestats.py b/.venv/Lib/site-packages/scipy/stats/tests/test_morestats.py new file mode 100644 index 0000000000000000000000000000000000000000..3d36b5ce62149cdeac5f6263573af2dcbc2a16d6 --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_morestats.py @@ -0,0 +1,2986 @@ +# Author: Travis Oliphant, 2002 +# +# Further enhancements and tests added by numerous SciPy developers. +# +import warnings +import sys +from functools import partial + +import numpy as np +from numpy.random import RandomState +from numpy.testing import (assert_array_equal, assert_almost_equal, + assert_array_less, assert_array_almost_equal, + assert_, assert_allclose, assert_equal, + suppress_warnings) +import pytest +from pytest import raises as assert_raises +import re +from scipy import optimize, stats, special +from scipy.stats._morestats import _abw_state, _get_As_weibull, _Avals_weibull +from .common_tests import check_named_results +from .._hypotests import _get_wilcoxon_distr, _get_wilcoxon_distr2 +from scipy.stats._binomtest import _binary_search_for_binom_tst +from scipy.stats._distr_params import distcont + +distcont = dict(distcont) # type: ignore + +# Matplotlib is not a scipy dependency but is optionally used in probplot, so +# check if it's available +try: + import matplotlib + matplotlib.rcParams['backend'] = 'Agg' + import matplotlib.pyplot as plt + have_matplotlib = True +except Exception: + have_matplotlib = False + + +# test data gear.dat from NIST for Levene and Bartlett test +# https://www.itl.nist.gov/div898/handbook/eda/section3/eda3581.htm +g1 = [1.006, 0.996, 0.998, 1.000, 0.992, 0.993, 1.002, 0.999, 0.994, 1.000] +g2 = [0.998, 1.006, 1.000, 1.002, 0.997, 0.998, 0.996, 1.000, 1.006, 0.988] +g3 = [0.991, 0.987, 0.997, 0.999, 0.995, 0.994, 1.000, 0.999, 0.996, 0.996] +g4 = [1.005, 1.002, 0.994, 1.000, 0.995, 0.994, 0.998, 0.996, 1.002, 0.996] +g5 = [0.998, 0.998, 0.982, 0.990, 1.002, 0.984, 0.996, 0.993, 0.980, 0.996] +g6 = [1.009, 1.013, 1.009, 0.997, 0.988, 1.002, 0.995, 0.998, 0.981, 0.996] +g7 = [0.990, 1.004, 0.996, 1.001, 0.998, 1.000, 1.018, 1.010, 0.996, 1.002] +g8 = [0.998, 1.000, 1.006, 1.000, 1.002, 0.996, 0.998, 0.996, 1.002, 1.006] +g9 = [1.002, 0.998, 0.996, 0.995, 0.996, 1.004, 1.004, 0.998, 0.999, 0.991] +g10 = [0.991, 0.995, 0.984, 0.994, 0.997, 0.997, 0.991, 0.998, 1.004, 0.997] + + +# The loggamma RVS stream is changing due to gh-13349; this version +# preserves the old stream so that tests don't change. +def _old_loggamma_rvs(*args, **kwargs): + return np.log(stats.gamma.rvs(*args, **kwargs)) + + +class TestBayes_mvs: + def test_basic(self): + # Expected values in this test simply taken from the function. For + # some checks regarding correctness of implementation, see review in + # gh-674 + data = [6, 9, 12, 7, 8, 8, 13] + mean, var, std = stats.bayes_mvs(data) + assert_almost_equal(mean.statistic, 9.0) + assert_allclose(mean.minmax, (7.103650222492964, 10.896349777507034), + rtol=1e-6) + + assert_almost_equal(var.statistic, 10.0) + assert_allclose(var.minmax, (3.1767242068607087, 24.45910381334018), + rtol=1e-09) + + assert_almost_equal(std.statistic, 2.9724954732045084, decimal=14) + assert_allclose(std.minmax, (1.7823367265645145, 4.9456146050146312), + rtol=1e-14) + + def test_empty_input(self): + assert_raises(ValueError, stats.bayes_mvs, []) + + def test_result_attributes(self): + x = np.arange(15) + attributes = ('statistic', 'minmax') + res = stats.bayes_mvs(x) + + for i in res: + check_named_results(i, attributes) + + +class TestMvsdist: + def test_basic(self): + data = [6, 9, 12, 7, 8, 8, 13] + mean, var, std = stats.mvsdist(data) + assert_almost_equal(mean.mean(), 9.0) + assert_allclose(mean.interval(0.9), (7.103650222492964, + 10.896349777507034), rtol=1e-14) + + assert_almost_equal(var.mean(), 10.0) + assert_allclose(var.interval(0.9), (3.1767242068607087, + 24.45910381334018), rtol=1e-09) + + assert_almost_equal(std.mean(), 2.9724954732045084, decimal=14) + assert_allclose(std.interval(0.9), (1.7823367265645145, + 4.9456146050146312), rtol=1e-14) + + def test_empty_input(self): + assert_raises(ValueError, stats.mvsdist, []) + + def test_bad_arg(self): + # Raise ValueError if fewer than two data points are given. + data = [1] + assert_raises(ValueError, stats.mvsdist, data) + + def test_warns(self): + # regression test for gh-5270 + # make sure there are no spurious divide-by-zero warnings + with warnings.catch_warnings(): + warnings.simplefilter('error', RuntimeWarning) + [x.mean() for x in stats.mvsdist([1, 2, 3])] + [x.mean() for x in stats.mvsdist([1, 2, 3, 4, 5])] + + +class TestShapiro: + def test_basic(self): + x1 = [0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46, + 4.43, 0.21, 4.75, 0.71, 1.52, 3.24, + 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66] + w, pw = stats.shapiro(x1) + shapiro_test = stats.shapiro(x1) + assert_almost_equal(w, 0.90047299861907959, decimal=6) + assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6) + assert_almost_equal(pw, 0.042089745402336121, decimal=6) + assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6) + + x2 = [1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11, + 3.48, 1.10, 0.88, -0.51, 1.46, 0.52, 6.20, 1.69, + 0.08, 3.67, 2.81, 3.49] + w, pw = stats.shapiro(x2) + shapiro_test = stats.shapiro(x2) + assert_almost_equal(w, 0.9590270, decimal=6) + assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6) + assert_almost_equal(pw, 0.52460, decimal=3) + assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3) + + # Verified against R + x3 = stats.norm.rvs(loc=5, scale=3, size=100, random_state=12345678) + w, pw = stats.shapiro(x3) + shapiro_test = stats.shapiro(x3) + assert_almost_equal(w, 0.9772805571556091, decimal=6) + assert_almost_equal(shapiro_test.statistic, 0.9772805571556091, decimal=6) + assert_almost_equal(pw, 0.08144091814756393, decimal=3) + assert_almost_equal(shapiro_test.pvalue, 0.08144091814756393, decimal=3) + + # Extracted from original paper + x4 = [0.139, 0.157, 0.175, 0.256, 0.344, 0.413, 0.503, 0.577, 0.614, + 0.655, 0.954, 1.392, 1.557, 1.648, 1.690, 1.994, 2.174, 2.206, + 3.245, 3.510, 3.571, 4.354, 4.980, 6.084, 8.351] + W_expected = 0.83467 + p_expected = 0.000914 + w, pw = stats.shapiro(x4) + shapiro_test = stats.shapiro(x4) + assert_almost_equal(w, W_expected, decimal=4) + assert_almost_equal(shapiro_test.statistic, W_expected, decimal=4) + assert_almost_equal(pw, p_expected, decimal=5) + assert_almost_equal(shapiro_test.pvalue, p_expected, decimal=5) + + def test_2d(self): + x1 = [[0.11, 7.87, 4.61, 10.14, 7.95, 3.14, 0.46, + 4.43, 0.21, 4.75], [0.71, 1.52, 3.24, + 0.93, 0.42, 4.97, 9.53, 4.55, 0.47, 6.66]] + w, pw = stats.shapiro(x1) + shapiro_test = stats.shapiro(x1) + assert_almost_equal(w, 0.90047299861907959, decimal=6) + assert_almost_equal(shapiro_test.statistic, 0.90047299861907959, decimal=6) + assert_almost_equal(pw, 0.042089745402336121, decimal=6) + assert_almost_equal(shapiro_test.pvalue, 0.042089745402336121, decimal=6) + + x2 = [[1.36, 1.14, 2.92, 2.55, 1.46, 1.06, 5.27, -1.11, + 3.48, 1.10], [0.88, -0.51, 1.46, 0.52, 6.20, 1.69, + 0.08, 3.67, 2.81, 3.49]] + w, pw = stats.shapiro(x2) + shapiro_test = stats.shapiro(x2) + assert_almost_equal(w, 0.9590270, decimal=6) + assert_almost_equal(shapiro_test.statistic, 0.9590270, decimal=6) + assert_almost_equal(pw, 0.52460, decimal=3) + assert_almost_equal(shapiro_test.pvalue, 0.52460, decimal=3) + + def test_empty_input(self): + assert_raises(ValueError, stats.shapiro, []) + assert_raises(ValueError, stats.shapiro, [[], [], []]) + + def test_not_enough_values(self): + assert_raises(ValueError, stats.shapiro, [1, 2]) + assert_raises(ValueError, stats.shapiro, np.array([[], [2]], dtype=object)) + + def test_bad_arg(self): + # Length of x is less than 3. + x = [1] + assert_raises(ValueError, stats.shapiro, x) + + def test_nan_input(self): + x = np.arange(10.) + x[9] = np.nan + + w, pw = stats.shapiro(x) + shapiro_test = stats.shapiro(x) + assert_equal(w, np.nan) + assert_equal(shapiro_test.statistic, np.nan) + # Originally, shapiro returned a p-value of 1 in this case, + # but there is no way to produce a numerical p-value if the + # statistic is not a number. NaN is more appropriate. + assert_almost_equal(pw, np.nan) + assert_almost_equal(shapiro_test.pvalue, np.nan) + + def test_gh14462(self): + # shapiro is theoretically location-invariant, but when the magnitude + # of the values is much greater than the variance, there can be + # numerical issues. Fixed by subtracting median from the data. + # See gh-14462. + + trans_val, maxlog = stats.boxcox([122500, 474400, 110400]) + res = stats.shapiro(trans_val) + + # Reference from R: + # options(digits=16) + # x = c(0.00000000e+00, 3.39996924e-08, -6.35166875e-09) + # shapiro.test(x) + ref = (0.86468431705371, 0.2805581751566) + + assert_allclose(res, ref, rtol=1e-5) + + def test_length_3_gh18322(self): + # gh-18322 reported that the p-value could be negative for input of + # length 3. Check that this is resolved. + res = stats.shapiro([0.6931471805599453, 0.0, 0.0]) + assert res.pvalue >= 0 + + # R `shapiro.test` doesn't produce an accurate p-value in the case + # above. Check that the formula used in `stats.shapiro` is not wrong. + # options(digits=16) + # x = c(-0.7746653110021126, -0.4344432067942129, 1.8157053280290931) + # shapiro.test(x) + x = [-0.7746653110021126, -0.4344432067942129, 1.8157053280290931] + res = stats.shapiro(x) + assert_allclose(res.statistic, 0.84658770645509) + assert_allclose(res.pvalue, 0.2313666489882, rtol=1e-6) + + +class TestAnderson: + def test_normal(self): + rs = RandomState(1234567890) + x1 = rs.standard_exponential(size=50) + x2 = rs.standard_normal(size=50) + A, crit, sig = stats.anderson(x1) + assert_array_less(crit[:-1], A) + A, crit, sig = stats.anderson(x2) + assert_array_less(A, crit[-2:]) + + v = np.ones(10) + v[0] = 0 + A, crit, sig = stats.anderson(v) + # The expected statistic 3.208057 was computed independently of scipy. + # For example, in R: + # > library(nortest) + # > v <- rep(1, 10) + # > v[1] <- 0 + # > result <- ad.test(v) + # > result$statistic + # A + # 3.208057 + assert_allclose(A, 3.208057) + + def test_expon(self): + rs = RandomState(1234567890) + x1 = rs.standard_exponential(size=50) + x2 = rs.standard_normal(size=50) + A, crit, sig = stats.anderson(x1, 'expon') + assert_array_less(A, crit[-2:]) + with np.errstate(all='ignore'): + A, crit, sig = stats.anderson(x2, 'expon') + assert_(A > crit[-1]) + + def test_gumbel(self): + # Regression test for gh-6306. Before that issue was fixed, + # this case would return a2=inf. + v = np.ones(100) + v[0] = 0.0 + a2, crit, sig = stats.anderson(v, 'gumbel') + # A brief reimplementation of the calculation of the statistic. + n = len(v) + xbar, s = stats.gumbel_l.fit(v) + logcdf = stats.gumbel_l.logcdf(v, xbar, s) + logsf = stats.gumbel_l.logsf(v, xbar, s) + i = np.arange(1, n+1) + expected_a2 = -n - np.mean((2*i - 1) * (logcdf + logsf[::-1])) + + assert_allclose(a2, expected_a2) + + def test_bad_arg(self): + assert_raises(ValueError, stats.anderson, [1], dist='plate_of_shrimp') + + def test_result_attributes(self): + rs = RandomState(1234567890) + x = rs.standard_exponential(size=50) + res = stats.anderson(x) + attributes = ('statistic', 'critical_values', 'significance_level') + check_named_results(res, attributes) + + def test_gumbel_l(self): + # gh-2592, gh-6337 + # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist. + rs = RandomState(1234567890) + x = rs.gumbel(size=100) + A1, crit1, sig1 = stats.anderson(x, 'gumbel') + A2, crit2, sig2 = stats.anderson(x, 'gumbel_l') + + assert_allclose(A2, A1) + + def test_gumbel_r(self): + # gh-2592, gh-6337 + # Adds support to 'gumbel_r' and 'gumbel_l' as valid inputs for dist. + rs = RandomState(1234567890) + x1 = rs.gumbel(size=100) + x2 = np.ones(100) + # A constant array is a degenerate case and breaks gumbel_r.fit, so + # change one value in x2. + x2[0] = 0.996 + A1, crit1, sig1 = stats.anderson(x1, 'gumbel_r') + A2, crit2, sig2 = stats.anderson(x2, 'gumbel_r') + + assert_array_less(A1, crit1[-2:]) + assert_(A2 > crit2[-1]) + + def test_weibull_min_case_A(self): + # data and reference values from `anderson` reference [7] + x = np.array([225, 171, 198, 189, 189, 135, 162, 135, 117, 162]) + res = stats.anderson(x, 'weibull_min') + m, loc, scale = res.fit_result.params + assert_allclose((m, loc, scale), (2.38, 99.02, 78.23), rtol=2e-3) + assert_allclose(res.statistic, 0.260, rtol=1e-3) + assert res.statistic < res.critical_values[0] + + c = 1 / m # ~0.42 + assert_allclose(c, 1/2.38, rtol=2e-3) + # interpolate between rows for c=0.4 and c=0.45, indices -3 and -2 + As40 = _Avals_weibull[-3] + As45 = _Avals_weibull[-2] + As_ref = As40 + (c - 0.4)/(0.45 - 0.4) * (As45 - As40) + # atol=1e-3 because results are rounded up to the next third decimal + assert np.all(res.critical_values > As_ref) + assert_allclose(res.critical_values, As_ref, atol=1e-3) + + def test_weibull_min_case_B(self): + # From `anderson` reference [7] + x = np.array([74, 57, 48, 29, 502, 12, 70, 21, + 29, 386, 59, 27, 153, 26, 326]) + message = "Maximum likelihood estimation has converged to " + with pytest.raises(ValueError, match=message): + stats.anderson(x, 'weibull_min') + + def test_weibull_warning_error(self): + # Check for warning message when there are too few observations + # This is also an example in which an error occurs during fitting + x = -np.array([225, 75, 57, 168, 107, 12, 61, 43, 29]) + wmessage = "Critical values of the test statistic are given for the..." + emessage = "An error occurred while fitting the Weibull distribution..." + wcontext = pytest.warns(UserWarning, match=wmessage) + econtext = pytest.raises(ValueError, match=emessage) + with wcontext, econtext: + stats.anderson(x, 'weibull_min') + + @pytest.mark.parametrize('distname', + ['norm', 'expon', 'gumbel_l', 'extreme1', + 'gumbel', 'gumbel_r', 'logistic', 'weibull_min']) + def test_anderson_fit_params(self, distname): + # check that anderson now returns a FitResult + rng = np.random.default_rng(330691555377792039) + real_distname = ('gumbel_l' if distname in {'extreme1', 'gumbel'} + else distname) + dist = getattr(stats, real_distname) + params = distcont[real_distname] + x = dist.rvs(*params, size=1000, random_state=rng) + res = stats.anderson(x, distname) + assert res.fit_result.success + + def test_anderson_weibull_As(self): + m = 1 # "when mi < 2, so that c > 0.5, the last line...should be used" + assert_equal(_get_As_weibull(1/m), _Avals_weibull[-1]) + m = np.inf + assert_equal(_get_As_weibull(1/m), _Avals_weibull[0]) + + +class TestAndersonKSamp: + def test_example1a(self): + # Example data from Scholz & Stephens (1987), originally + # published in Lehmann (1995, Nonparametrics, Statistical + # Methods Based on Ranks, p. 309) + # Pass a mixture of lists and arrays + t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0] + t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) + t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0]) + t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8]) + + Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=False) + + assert_almost_equal(Tk, 4.449, 3) + assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459], + tm[0:5], 4) + assert_allclose(p, 0.0021, atol=0.00025) + + def test_example1b(self): + # Example data from Scholz & Stephens (1987), originally + # published in Lehmann (1995, Nonparametrics, Statistical + # Methods Based on Ranks, p. 309) + # Pass arrays + t1 = np.array([38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0]) + t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) + t3 = np.array([34.0, 35.0, 39.0, 40.0, 43.0, 43.0, 44.0, 45.0]) + t4 = np.array([34.0, 34.8, 34.8, 35.4, 37.2, 37.8, 41.2, 42.8]) + Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4), midrank=True) + + assert_almost_equal(Tk, 4.480, 3) + assert_array_almost_equal([0.4985, 1.3237, 1.9158, 2.4930, 3.2459], + tm[0:5], 4) + assert_allclose(p, 0.0020, atol=0.00025) + + @pytest.mark.slow + def test_example2a(self): + # Example data taken from an earlier technical report of + # Scholz and Stephens + # Pass lists instead of arrays + t1 = [194, 15, 41, 29, 33, 181] + t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118] + t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34] + t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29, + 118, 25, 156, 310, 76, 26, 44, 23, 62] + t5 = [130, 208, 70, 101, 208] + t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27] + t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33] + t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5, + 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95] + t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82, + 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24] + t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36, + 22, 139, 210, 97, 30, 23, 13, 14] + t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438] + t12 = [50, 254, 5, 283, 35, 12] + t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130] + t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66, + 61, 34] + + samples = (t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14) + Tk, tm, p = stats.anderson_ksamp(samples, midrank=False) + assert_almost_equal(Tk, 3.288, 3) + assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009], + tm[0:5], 4) + assert_allclose(p, 0.0041, atol=0.00025) + + rng = np.random.default_rng(6989860141921615054) + method = stats.PermutationMethod(n_resamples=9999, random_state=rng) + res = stats.anderson_ksamp(samples, midrank=False, method=method) + assert_array_equal(res.statistic, Tk) + assert_array_equal(res.critical_values, tm) + assert_allclose(res.pvalue, p, atol=6e-4) + + def test_example2b(self): + # Example data taken from an earlier technical report of + # Scholz and Stephens + t1 = [194, 15, 41, 29, 33, 181] + t2 = [413, 14, 58, 37, 100, 65, 9, 169, 447, 184, 36, 201, 118] + t3 = [34, 31, 18, 18, 67, 57, 62, 7, 22, 34] + t4 = [90, 10, 60, 186, 61, 49, 14, 24, 56, 20, 79, 84, 44, 59, 29, + 118, 25, 156, 310, 76, 26, 44, 23, 62] + t5 = [130, 208, 70, 101, 208] + t6 = [74, 57, 48, 29, 502, 12, 70, 21, 29, 386, 59, 27] + t7 = [55, 320, 56, 104, 220, 239, 47, 246, 176, 182, 33] + t8 = [23, 261, 87, 7, 120, 14, 62, 47, 225, 71, 246, 21, 42, 20, 5, + 12, 120, 11, 3, 14, 71, 11, 14, 11, 16, 90, 1, 16, 52, 95] + t9 = [97, 51, 11, 4, 141, 18, 142, 68, 77, 80, 1, 16, 106, 206, 82, + 54, 31, 216, 46, 111, 39, 63, 18, 191, 18, 163, 24] + t10 = [50, 44, 102, 72, 22, 39, 3, 15, 197, 188, 79, 88, 46, 5, 5, 36, + 22, 139, 210, 97, 30, 23, 13, 14] + t11 = [359, 9, 12, 270, 603, 3, 104, 2, 438] + t12 = [50, 254, 5, 283, 35, 12] + t13 = [487, 18, 100, 7, 98, 5, 85, 91, 43, 230, 3, 130] + t14 = [102, 209, 14, 57, 54, 32, 67, 59, 134, 152, 27, 14, 230, 66, + 61, 34] + + Tk, tm, p = stats.anderson_ksamp((t1, t2, t3, t4, t5, t6, t7, t8, + t9, t10, t11, t12, t13, t14), + midrank=True) + + assert_almost_equal(Tk, 3.294, 3) + assert_array_almost_equal([0.5990, 1.3269, 1.8052, 2.2486, 2.8009], + tm[0:5], 4) + assert_allclose(p, 0.0041, atol=0.00025) + + def test_R_kSamples(self): + # test values generates with R package kSamples + # package version 1.2-6 (2017-06-14) + # r1 = 1:100 + # continuous case (no ties) --> version 1 + # res <- kSamples::ad.test(r1, r1 + 40.5) + # res$ad[1, "T.AD"] # 41.105 + # res$ad[1, " asympt. P-value"] # 5.8399e-18 + # + # discrete case (ties allowed) --> version 2 (here: midrank=True) + # res$ad[2, "T.AD"] # 41.235 + # + # res <- kSamples::ad.test(r1, r1 + .5) + # res$ad[1, "T.AD"] # -1.2824 + # res$ad[1, " asympt. P-value"] # 1 + # res$ad[2, "T.AD"] # -1.2944 + # + # res <- kSamples::ad.test(r1, r1 + 7.5) + # res$ad[1, "T.AD"] # 1.4923 + # res$ad[1, " asympt. P-value"] # 0.077501 + # + # res <- kSamples::ad.test(r1, r1 + 6) + # res$ad[2, "T.AD"] # 0.63892 + # res$ad[2, " asympt. P-value"] # 0.17981 + # + # res <- kSamples::ad.test(r1, r1 + 11.5) + # res$ad[1, "T.AD"] # 4.5042 + # res$ad[1, " asympt. P-value"] # 0.00545 + # + # res <- kSamples::ad.test(r1, r1 + 13.5) + # res$ad[1, "T.AD"] # 6.2982 + # res$ad[1, " asympt. P-value"] # 0.00118 + + x1 = np.linspace(1, 100, 100) + # test case: different distributions;p-value floored at 0.001 + # test case for issue #5493 / #8536 + with suppress_warnings() as sup: + sup.filter(UserWarning, message='p-value floored') + s, _, p = stats.anderson_ksamp([x1, x1 + 40.5], midrank=False) + assert_almost_equal(s, 41.105, 3) + assert_equal(p, 0.001) + + with suppress_warnings() as sup: + sup.filter(UserWarning, message='p-value floored') + s, _, p = stats.anderson_ksamp([x1, x1 + 40.5]) + assert_almost_equal(s, 41.235, 3) + assert_equal(p, 0.001) + + # test case: similar distributions --> p-value capped at 0.25 + with suppress_warnings() as sup: + sup.filter(UserWarning, message='p-value capped') + s, _, p = stats.anderson_ksamp([x1, x1 + .5], midrank=False) + assert_almost_equal(s, -1.2824, 4) + assert_equal(p, 0.25) + + with suppress_warnings() as sup: + sup.filter(UserWarning, message='p-value capped') + s, _, p = stats.anderson_ksamp([x1, x1 + .5]) + assert_almost_equal(s, -1.2944, 4) + assert_equal(p, 0.25) + + # test case: check interpolated p-value in [0.01, 0.25] (no ties) + s, _, p = stats.anderson_ksamp([x1, x1 + 7.5], midrank=False) + assert_almost_equal(s, 1.4923, 4) + assert_allclose(p, 0.0775, atol=0.005, rtol=0) + + # test case: check interpolated p-value in [0.01, 0.25] (w/ ties) + s, _, p = stats.anderson_ksamp([x1, x1 + 6]) + assert_almost_equal(s, 0.6389, 4) + assert_allclose(p, 0.1798, atol=0.005, rtol=0) + + # test extended critical values for p=0.001 and p=0.005 + s, _, p = stats.anderson_ksamp([x1, x1 + 11.5], midrank=False) + assert_almost_equal(s, 4.5042, 4) + assert_allclose(p, 0.00545, atol=0.0005, rtol=0) + + s, _, p = stats.anderson_ksamp([x1, x1 + 13.5], midrank=False) + assert_almost_equal(s, 6.2982, 4) + assert_allclose(p, 0.00118, atol=0.0001, rtol=0) + + def test_not_enough_samples(self): + assert_raises(ValueError, stats.anderson_ksamp, np.ones(5)) + + def test_no_distinct_observations(self): + assert_raises(ValueError, stats.anderson_ksamp, + (np.ones(5), np.ones(5))) + + def test_empty_sample(self): + assert_raises(ValueError, stats.anderson_ksamp, (np.ones(5), [])) + + def test_result_attributes(self): + # Pass a mixture of lists and arrays + t1 = [38.7, 41.5, 43.8, 44.5, 45.5, 46.0, 47.7, 58.0] + t2 = np.array([39.2, 39.3, 39.7, 41.4, 41.8, 42.9, 43.3, 45.8]) + res = stats.anderson_ksamp((t1, t2), midrank=False) + + attributes = ('statistic', 'critical_values', 'significance_level') + check_named_results(res, attributes) + + assert_equal(res.significance_level, res.pvalue) + + +class TestAnsari: + + def test_small(self): + x = [1, 2, 3, 3, 4] + y = [3, 2, 6, 1, 6, 1, 4, 1] + with suppress_warnings() as sup: + sup.filter(UserWarning, "Ties preclude use of exact statistic.") + W, pval = stats.ansari(x, y) + assert_almost_equal(W, 23.5, 11) + assert_almost_equal(pval, 0.13499256881897437, 11) + + def test_approx(self): + ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, + 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) + parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, + 100, 96, 108, 103, 104, 114, 114, 113, 108, + 106, 99)) + + with suppress_warnings() as sup: + sup.filter(UserWarning, "Ties preclude use of exact statistic.") + W, pval = stats.ansari(ramsay, parekh) + + assert_almost_equal(W, 185.5, 11) + assert_almost_equal(pval, 0.18145819972867083, 11) + + def test_exact(self): + W, pval = stats.ansari([1, 2, 3, 4], [15, 5, 20, 8, 10, 12]) + assert_almost_equal(W, 10.0, 11) + assert_almost_equal(pval, 0.533333333333333333, 7) + + def test_bad_arg(self): + assert_raises(ValueError, stats.ansari, [], [1]) + assert_raises(ValueError, stats.ansari, [1], []) + + def test_result_attributes(self): + x = [1, 2, 3, 3, 4] + y = [3, 2, 6, 1, 6, 1, 4, 1] + with suppress_warnings() as sup: + sup.filter(UserWarning, "Ties preclude use of exact statistic.") + res = stats.ansari(x, y) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes) + + def test_bad_alternative(self): + # invalid value for alternative must raise a ValueError + x1 = [1, 2, 3, 4] + x2 = [5, 6, 7, 8] + match = "'alternative' must be 'two-sided'" + with assert_raises(ValueError, match=match): + stats.ansari(x1, x2, alternative='foo') + + def test_alternative_exact(self): + x1 = [-5, 1, 5, 10, 15, 20, 25] # high scale, loc=10 + x2 = [7.5, 8.5, 9.5, 10.5, 11.5, 12.5] # low scale, loc=10 + # ratio of scales is greater than 1. So, the + # p-value must be high when `alternative='less'` + # and low when `alternative='greater'`. + statistic, pval = stats.ansari(x1, x2) + pval_l = stats.ansari(x1, x2, alternative='less').pvalue + pval_g = stats.ansari(x1, x2, alternative='greater').pvalue + assert pval_l > 0.95 + assert pval_g < 0.05 # level of significance. + # also check if the p-values sum up to 1 plus the probability + # mass under the calculated statistic. + prob = _abw_state.pmf(statistic, len(x1), len(x2)) + assert_allclose(pval_g + pval_l, 1 + prob, atol=1e-12) + # also check if one of the one-sided p-value equals half the + # two-sided p-value and the other one-sided p-value is its + # compliment. + assert_allclose(pval_g, pval/2, atol=1e-12) + assert_allclose(pval_l, 1+prob-pval/2, atol=1e-12) + # sanity check. The result should flip if + # we exchange x and y. + pval_l_reverse = stats.ansari(x2, x1, alternative='less').pvalue + pval_g_reverse = stats.ansari(x2, x1, alternative='greater').pvalue + assert pval_l_reverse < 0.05 + assert pval_g_reverse > 0.95 + + @pytest.mark.parametrize( + 'x, y, alternative, expected', + # the tests are designed in such a way that the + # if else statement in ansari test for exact + # mode is covered. + [([1, 2, 3, 4], [5, 6, 7, 8], 'less', 0.6285714285714), + ([1, 2, 3, 4], [5, 6, 7, 8], 'greater', 0.6285714285714), + ([1, 2, 3], [4, 5, 6, 7, 8], 'less', 0.8928571428571), + ([1, 2, 3], [4, 5, 6, 7, 8], 'greater', 0.2857142857143), + ([1, 2, 3, 4, 5], [6, 7, 8], 'less', 0.2857142857143), + ([1, 2, 3, 4, 5], [6, 7, 8], 'greater', 0.8928571428571)] + ) + def test_alternative_exact_with_R(self, x, y, alternative, expected): + # testing with R on arbitrary data + # Sample R code used for the third test case above: + # ```R + # > options(digits=16) + # > x <- c(1,2,3) + # > y <- c(4,5,6,7,8) + # > ansari.test(x, y, alternative='less', exact=TRUE) + # + # Ansari-Bradley test + # + # data: x and y + # AB = 6, p-value = 0.8928571428571 + # alternative hypothesis: true ratio of scales is less than 1 + # + # ``` + pval = stats.ansari(x, y, alternative=alternative).pvalue + assert_allclose(pval, expected, atol=1e-12) + + def test_alternative_approx(self): + # intuitive tests for approximation + x1 = stats.norm.rvs(0, 5, size=100, random_state=123) + x2 = stats.norm.rvs(0, 2, size=100, random_state=123) + # for m > 55 or n > 55, the test should automatically + # switch to approximation. + pval_l = stats.ansari(x1, x2, alternative='less').pvalue + pval_g = stats.ansari(x1, x2, alternative='greater').pvalue + assert_allclose(pval_l, 1.0, atol=1e-12) + assert_allclose(pval_g, 0.0, atol=1e-12) + # also check if one of the one-sided p-value equals half the + # two-sided p-value and the other one-sided p-value is its + # compliment. + x1 = stats.norm.rvs(0, 2, size=60, random_state=123) + x2 = stats.norm.rvs(0, 1.5, size=60, random_state=123) + pval = stats.ansari(x1, x2).pvalue + pval_l = stats.ansari(x1, x2, alternative='less').pvalue + pval_g = stats.ansari(x1, x2, alternative='greater').pvalue + assert_allclose(pval_g, pval/2, atol=1e-12) + assert_allclose(pval_l, 1-pval/2, atol=1e-12) + + +class TestBartlett: + + def test_data(self): + # https://www.itl.nist.gov/div898/handbook/eda/section3/eda357.htm + args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] + T, pval = stats.bartlett(*args) + assert_almost_equal(T, 20.78587342806484, 7) + assert_almost_equal(pval, 0.0136358632781, 7) + + def test_bad_arg(self): + # Too few args raises ValueError. + assert_raises(ValueError, stats.bartlett, [1]) + + def test_result_attributes(self): + args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] + res = stats.bartlett(*args) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes) + + def test_empty_arg(self): + args = (g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, []) + assert_equal((np.nan, np.nan), stats.bartlett(*args)) + + # temporary fix for issue #9252: only accept 1d input + def test_1d_input(self): + x = np.array([[1, 2], [3, 4]]) + assert_raises(ValueError, stats.bartlett, g1, x) + + +class TestLevene: + + def test_data(self): + # https://www.itl.nist.gov/div898/handbook/eda/section3/eda35a.htm + args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] + W, pval = stats.levene(*args) + assert_almost_equal(W, 1.7059176930008939, 7) + assert_almost_equal(pval, 0.0990829755522, 7) + + def test_trimmed1(self): + # Test that center='trimmed' gives the same result as center='mean' + # when proportiontocut=0. + W1, pval1 = stats.levene(g1, g2, g3, center='mean') + W2, pval2 = stats.levene(g1, g2, g3, center='trimmed', + proportiontocut=0.0) + assert_almost_equal(W1, W2) + assert_almost_equal(pval1, pval2) + + def test_trimmed2(self): + x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0] + y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0] + np.random.seed(1234) + x2 = np.random.permutation(x) + + # Use center='trimmed' + W0, pval0 = stats.levene(x, y, center='trimmed', + proportiontocut=0.125) + W1, pval1 = stats.levene(x2, y, center='trimmed', + proportiontocut=0.125) + # Trim the data here, and use center='mean' + W2, pval2 = stats.levene(x[1:-1], y[1:-1], center='mean') + # Result should be the same. + assert_almost_equal(W0, W2) + assert_almost_equal(W1, W2) + assert_almost_equal(pval1, pval2) + + def test_equal_mean_median(self): + x = np.linspace(-1, 1, 21) + np.random.seed(1234) + x2 = np.random.permutation(x) + y = x**3 + W1, pval1 = stats.levene(x, y, center='mean') + W2, pval2 = stats.levene(x2, y, center='median') + assert_almost_equal(W1, W2) + assert_almost_equal(pval1, pval2) + + def test_bad_keyword(self): + x = np.linspace(-1, 1, 21) + assert_raises(TypeError, stats.levene, x, x, portiontocut=0.1) + + def test_bad_center_value(self): + x = np.linspace(-1, 1, 21) + assert_raises(ValueError, stats.levene, x, x, center='trim') + + def test_too_few_args(self): + assert_raises(ValueError, stats.levene, [1]) + + def test_result_attributes(self): + args = [g1, g2, g3, g4, g5, g6, g7, g8, g9, g10] + res = stats.levene(*args) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes) + + # temporary fix for issue #9252: only accept 1d input + def test_1d_input(self): + x = np.array([[1, 2], [3, 4]]) + assert_raises(ValueError, stats.levene, g1, x) + + +class TestBinomTest: + """Tests for stats.binomtest.""" + + # Expected results here are from R binom.test, e.g. + # options(digits=16) + # binom.test(484, 967, p=0.48) + # + def test_two_sided_pvalues1(self): + # `tol` could be stricter on most architectures, but the value + # here is limited by accuracy of `binom.cdf` for large inputs on + # Linux_Python_37_32bit_full and aarch64 + rtol = 1e-10 # aarch64 observed rtol: 1.5e-11 + res = stats.binomtest(10079999, 21000000, 0.48) + assert_allclose(res.pvalue, 1.0, rtol=rtol) + res = stats.binomtest(10079990, 21000000, 0.48) + assert_allclose(res.pvalue, 0.9966892187965, rtol=rtol) + res = stats.binomtest(10080009, 21000000, 0.48) + assert_allclose(res.pvalue, 0.9970377203856, rtol=rtol) + res = stats.binomtest(10080017, 21000000, 0.48) + assert_allclose(res.pvalue, 0.9940754817328, rtol=1e-9) + + def test_two_sided_pvalues2(self): + rtol = 1e-10 # no aarch64 failure with 1e-15, preemptive bump + res = stats.binomtest(9, n=21, p=0.48) + assert_allclose(res.pvalue, 0.6689672431939, rtol=rtol) + res = stats.binomtest(4, 21, 0.48) + assert_allclose(res.pvalue, 0.008139563452106, rtol=rtol) + res = stats.binomtest(11, 21, 0.48) + assert_allclose(res.pvalue, 0.8278629664608, rtol=rtol) + res = stats.binomtest(7, 21, 0.48) + assert_allclose(res.pvalue, 0.1966772901718, rtol=rtol) + res = stats.binomtest(3, 10, .5) + assert_allclose(res.pvalue, 0.34375, rtol=rtol) + res = stats.binomtest(2, 2, .4) + assert_allclose(res.pvalue, 0.16, rtol=rtol) + res = stats.binomtest(2, 4, .3) + assert_allclose(res.pvalue, 0.5884, rtol=rtol) + + def test_edge_cases(self): + rtol = 1e-10 # aarch64 observed rtol: 1.33e-15 + res = stats.binomtest(484, 967, 0.5) + assert_allclose(res.pvalue, 1, rtol=rtol) + res = stats.binomtest(3, 47, 3/47) + assert_allclose(res.pvalue, 1, rtol=rtol) + res = stats.binomtest(13, 46, 13/46) + assert_allclose(res.pvalue, 1, rtol=rtol) + res = stats.binomtest(15, 44, 15/44) + assert_allclose(res.pvalue, 1, rtol=rtol) + res = stats.binomtest(7, 13, 0.5) + assert_allclose(res.pvalue, 1, rtol=rtol) + res = stats.binomtest(6, 11, 0.5) + assert_allclose(res.pvalue, 1, rtol=rtol) + + def test_binary_srch_for_binom_tst(self): + # Test that old behavior of binomtest is maintained + # by the new binary search method in cases where d + # exactly equals the input on one side. + n = 10 + p = 0.5 + k = 3 + # First test for the case where k > mode of PMF + i = np.arange(np.ceil(p * n), n+1) + d = stats.binom.pmf(k, n, p) + # Old way of calculating y, probably consistent with R. + y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0) + # New way with binary search. + ix = _binary_search_for_binom_tst(lambda x1: + -stats.binom.pmf(x1, n, p), + -d, np.ceil(p * n), n) + y2 = n - ix + int(d == stats.binom.pmf(ix, n, p)) + assert_allclose(y1, y2, rtol=1e-9) + # Now test for the other side. + k = 7 + i = np.arange(np.floor(p * n) + 1) + d = stats.binom.pmf(k, n, p) + # Old way of calculating y. + y1 = np.sum(stats.binom.pmf(i, n, p) <= d, axis=0) + # New way with binary search. + ix = _binary_search_for_binom_tst(lambda x1: + stats.binom.pmf(x1, n, p), + d, 0, np.floor(p * n)) + y2 = ix + 1 + assert_allclose(y1, y2, rtol=1e-9) + + # Expected results here are from R 3.6.2 binom.test + @pytest.mark.parametrize('alternative, pval, ci_low, ci_high', + [('less', 0.148831050443, + 0.0, 0.2772002496709138), + ('greater', 0.9004695898947, + 0.1366613252458672, 1.0), + ('two-sided', 0.2983720970096, + 0.1266555521019559, 0.2918426890886281)]) + def test_confidence_intervals1(self, alternative, pval, ci_low, ci_high): + res = stats.binomtest(20, n=100, p=0.25, alternative=alternative) + assert_allclose(res.pvalue, pval, rtol=1e-12) + assert_equal(res.statistic, 0.2) + ci = res.proportion_ci(confidence_level=0.95) + assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-12) + + # Expected results here are from R 3.6.2 binom.test. + @pytest.mark.parametrize('alternative, pval, ci_low, ci_high', + [('less', + 0.005656361, 0.0, 0.1872093), + ('greater', + 0.9987146, 0.008860761, 1.0), + ('two-sided', + 0.01191714, 0.006872485, 0.202706269)]) + def test_confidence_intervals2(self, alternative, pval, ci_low, ci_high): + res = stats.binomtest(3, n=50, p=0.2, alternative=alternative) + assert_allclose(res.pvalue, pval, rtol=1e-6) + assert_equal(res.statistic, 0.06) + ci = res.proportion_ci(confidence_level=0.99) + assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6) + + # Expected results here are from R 3.6.2 binom.test. + @pytest.mark.parametrize('alternative, pval, ci_high', + [('less', 0.05631351, 0.2588656), + ('greater', 1.0, 1.0), + ('two-sided', 0.07604122, 0.3084971)]) + def test_confidence_interval_exact_k0(self, alternative, pval, ci_high): + # Test with k=0, n = 10. + res = stats.binomtest(0, 10, p=0.25, alternative=alternative) + assert_allclose(res.pvalue, pval, rtol=1e-6) + ci = res.proportion_ci(confidence_level=0.95) + assert_equal(ci.low, 0.0) + assert_allclose(ci.high, ci_high, rtol=1e-6) + + # Expected results here are from R 3.6.2 binom.test. + @pytest.mark.parametrize('alternative, pval, ci_low', + [('less', 1.0, 0.0), + ('greater', 9.536743e-07, 0.7411344), + ('two-sided', 9.536743e-07, 0.6915029)]) + def test_confidence_interval_exact_k_is_n(self, alternative, pval, ci_low): + # Test with k = n = 10. + res = stats.binomtest(10, 10, p=0.25, alternative=alternative) + assert_allclose(res.pvalue, pval, rtol=1e-6) + ci = res.proportion_ci(confidence_level=0.95) + assert_equal(ci.high, 1.0) + assert_allclose(ci.low, ci_low, rtol=1e-6) + + # Expected results are from the prop.test function in R 3.6.2. + @pytest.mark.parametrize( + 'k, alternative, corr, conf, ci_low, ci_high', + [[3, 'two-sided', True, 0.95, 0.08094782, 0.64632928], + [3, 'two-sided', True, 0.99, 0.0586329, 0.7169416], + [3, 'two-sided', False, 0.95, 0.1077913, 0.6032219], + [3, 'two-sided', False, 0.99, 0.07956632, 0.6799753], + [3, 'less', True, 0.95, 0.0, 0.6043476], + [3, 'less', True, 0.99, 0.0, 0.6901811], + [3, 'less', False, 0.95, 0.0, 0.5583002], + [3, 'less', False, 0.99, 0.0, 0.6507187], + [3, 'greater', True, 0.95, 0.09644904, 1.0], + [3, 'greater', True, 0.99, 0.06659141, 1.0], + [3, 'greater', False, 0.95, 0.1268766, 1.0], + [3, 'greater', False, 0.99, 0.08974147, 1.0], + + [0, 'two-sided', True, 0.95, 0.0, 0.3445372], + [0, 'two-sided', False, 0.95, 0.0, 0.2775328], + [0, 'less', True, 0.95, 0.0, 0.2847374], + [0, 'less', False, 0.95, 0.0, 0.212942], + [0, 'greater', True, 0.95, 0.0, 1.0], + [0, 'greater', False, 0.95, 0.0, 1.0], + + [10, 'two-sided', True, 0.95, 0.6554628, 1.0], + [10, 'two-sided', False, 0.95, 0.7224672, 1.0], + [10, 'less', True, 0.95, 0.0, 1.0], + [10, 'less', False, 0.95, 0.0, 1.0], + [10, 'greater', True, 0.95, 0.7152626, 1.0], + [10, 'greater', False, 0.95, 0.787058, 1.0]] + ) + def test_ci_wilson_method(self, k, alternative, corr, conf, + ci_low, ci_high): + res = stats.binomtest(k, n=10, p=0.1, alternative=alternative) + if corr: + method = 'wilsoncc' + else: + method = 'wilson' + ci = res.proportion_ci(confidence_level=conf, method=method) + assert_allclose((ci.low, ci.high), (ci_low, ci_high), rtol=1e-6) + + def test_estimate_equals_hypothesized_prop(self): + # Test the special case where the estimated proportion equals + # the hypothesized proportion. When alternative is 'two-sided', + # the p-value is 1. + res = stats.binomtest(4, 16, 0.25) + assert_equal(res.statistic, 0.25) + assert_equal(res.pvalue, 1.0) + + @pytest.mark.parametrize('k, n', [(0, 0), (-1, 2)]) + def test_invalid_k_n(self, k, n): + with pytest.raises(ValueError, + match="must be an integer not less than"): + stats.binomtest(k, n) + + def test_invalid_k_too_big(self): + with pytest.raises(ValueError, + match=r"k \(11\) must not be greater than n \(10\)."): + stats.binomtest(11, 10, 0.25) + + def test_invalid_k_wrong_type(self): + with pytest.raises(TypeError, + match="k must be an integer."): + stats.binomtest([10, 11], 21, 0.25) + + def test_invalid_p_range(self): + message = r'p \(-0.5\) must be in range...' + with pytest.raises(ValueError, match=message): + stats.binomtest(50, 150, p=-0.5) + message = r'p \(1.5\) must be in range...' + with pytest.raises(ValueError, match=message): + stats.binomtest(50, 150, p=1.5) + + def test_invalid_confidence_level(self): + res = stats.binomtest(3, n=10, p=0.1) + message = r"confidence_level \(-1\) must be in the interval" + with pytest.raises(ValueError, match=message): + res.proportion_ci(confidence_level=-1) + + def test_invalid_ci_method(self): + res = stats.binomtest(3, n=10, p=0.1) + with pytest.raises(ValueError, match=r"method \('plate of shrimp'\) must be"): + res.proportion_ci(method="plate of shrimp") + + def test_invalid_alternative(self): + with pytest.raises(ValueError, match=r"alternative \('ekki'\) not..."): + stats.binomtest(3, n=10, p=0.1, alternative='ekki') + + def test_alias(self): + res = stats.binomtest(3, n=10, p=0.1) + assert_equal(res.proportion_estimate, res.statistic) + + @pytest.mark.skipif(sys.maxsize <= 2**32, reason="32-bit does not overflow") + def test_boost_overflow_raises(self): + # Boost.Math error policy should raise exceptions in Python + with pytest.raises(OverflowError, match='Error in function...'): + stats.binomtest(5, 6, p=sys.float_info.min) + + +class TestFligner: + + def test_data(self): + # numbers from R: fligner.test in package stats + x1 = np.arange(5) + assert_array_almost_equal(stats.fligner(x1, x1**2), + (3.2282229927203536, 0.072379187848207877), + 11) + + def test_trimmed1(self): + # Perturb input to break ties in the transformed data + # See https://github.com/scipy/scipy/pull/8042 for more details + rs = np.random.RandomState(123) + + def _perturb(g): + return (np.asarray(g) + 1e-10 * rs.randn(len(g))).tolist() + + g1_ = _perturb(g1) + g2_ = _perturb(g2) + g3_ = _perturb(g3) + # Test that center='trimmed' gives the same result as center='mean' + # when proportiontocut=0. + Xsq1, pval1 = stats.fligner(g1_, g2_, g3_, center='mean') + Xsq2, pval2 = stats.fligner(g1_, g2_, g3_, center='trimmed', + proportiontocut=0.0) + assert_almost_equal(Xsq1, Xsq2) + assert_almost_equal(pval1, pval2) + + def test_trimmed2(self): + x = [1.2, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 100.0] + y = [0.0, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 200.0] + # Use center='trimmed' + Xsq1, pval1 = stats.fligner(x, y, center='trimmed', + proportiontocut=0.125) + # Trim the data here, and use center='mean' + Xsq2, pval2 = stats.fligner(x[1:-1], y[1:-1], center='mean') + # Result should be the same. + assert_almost_equal(Xsq1, Xsq2) + assert_almost_equal(pval1, pval2) + + # The following test looks reasonable at first, but fligner() uses the + # function stats.rankdata(), and in one of the cases in this test, + # there are ties, while in the other (because of normal rounding + # errors) there are not. This difference leads to differences in the + # third significant digit of W. + # + #def test_equal_mean_median(self): + # x = np.linspace(-1,1,21) + # y = x**3 + # W1, pval1 = stats.fligner(x, y, center='mean') + # W2, pval2 = stats.fligner(x, y, center='median') + # assert_almost_equal(W1, W2) + # assert_almost_equal(pval1, pval2) + + def test_bad_keyword(self): + x = np.linspace(-1, 1, 21) + assert_raises(TypeError, stats.fligner, x, x, portiontocut=0.1) + + def test_bad_center_value(self): + x = np.linspace(-1, 1, 21) + assert_raises(ValueError, stats.fligner, x, x, center='trim') + + def test_bad_num_args(self): + # Too few args raises ValueError. + assert_raises(ValueError, stats.fligner, [1]) + + def test_empty_arg(self): + x = np.arange(5) + assert_equal((np.nan, np.nan), stats.fligner(x, x**2, [])) + + +def mood_cases_with_ties(): + # Generate random `x` and `y` arrays with ties both between and within the + # samples. Expected results are (statistic, pvalue) from SAS. + expected_results = [(-1.76658511464992, .0386488678399305), + (-.694031428192304, .2438312498647250), + (-1.15093525352151, .1248794365836150)] + seeds = [23453254, 1298352315, 987234597] + for si, seed in enumerate(seeds): + rng = np.random.default_rng(seed) + xy = rng.random(100) + # Generate random indices to make ties + tie_ind = rng.integers(low=0, high=99, size=5) + # Generate a random number of ties for each index. + num_ties_per_ind = rng.integers(low=1, high=5, size=5) + # At each `tie_ind`, mark the next `n` indices equal to that value. + for i, n in zip(tie_ind, num_ties_per_ind): + for j in range(i + 1, i + n): + xy[j] = xy[i] + # scramble order of xy before splitting into `x, y` + rng.shuffle(xy) + x, y = np.split(xy, 2) + yield x, y, 'less', *expected_results[si] + + +class TestMood: + @pytest.mark.parametrize("x,y,alternative,stat_expect,p_expect", + mood_cases_with_ties()) + def test_against_SAS(self, x, y, alternative, stat_expect, p_expect): + """ + Example code used to generate SAS output: + DATA myData; + INPUT X Y; + CARDS; + 1 0 + 1 1 + 1 2 + 1 3 + 1 4 + 2 0 + 2 1 + 2 4 + 2 9 + 2 16 + ods graphics on; + proc npar1way mood data=myData ; + class X; + ods output MoodTest=mt; + proc contents data=mt; + proc print data=mt; + format Prob1 17.16 Prob2 17.16 Statistic 17.16 Z 17.16 ; + title "Mood Two-Sample Test"; + proc print data=myData; + title "Data for above results"; + run; + """ + statistic, pvalue = stats.mood(x, y, alternative=alternative) + assert_allclose(stat_expect, statistic, atol=1e-16) + assert_allclose(p_expect, pvalue, atol=1e-16) + + @pytest.mark.parametrize("alternative, expected", + [('two-sided', (1.019938533549930, + .3077576129778760)), + ('less', (1.019938533549930, + 1 - .1538788064889380)), + ('greater', (1.019938533549930, + .1538788064889380))]) + def test_against_SAS_2(self, alternative, expected): + # Code to run in SAS in above function + x = [111, 107, 100, 99, 102, 106, 109, 108, 104, 99, + 101, 96, 97, 102, 107, 113, 116, 113, 110, 98] + y = [107, 108, 106, 98, 105, 103, 110, 105, 104, 100, + 96, 108, 103, 104, 114, 114, 113, 108, 106, 99] + res = stats.mood(x, y, alternative=alternative) + assert_allclose(res, expected) + + def test_mood_order_of_args(self): + # z should change sign when the order of arguments changes, pvalue + # should not change + np.random.seed(1234) + x1 = np.random.randn(10, 1) + x2 = np.random.randn(15, 1) + z1, p1 = stats.mood(x1, x2) + z2, p2 = stats.mood(x2, x1) + assert_array_almost_equal([z1, p1], [-z2, p2]) + + def test_mood_with_axis_none(self): + # Test with axis = None, compare with results from R + x1 = [-0.626453810742332, 0.183643324222082, -0.835628612410047, + 1.59528080213779, 0.329507771815361, -0.820468384118015, + 0.487429052428485, 0.738324705129217, 0.575781351653492, + -0.305388387156356, 1.51178116845085, 0.389843236411431, + -0.621240580541804, -2.2146998871775, 1.12493091814311, + -0.0449336090152309, -0.0161902630989461, 0.943836210685299, + 0.821221195098089, 0.593901321217509] + + x2 = [-0.896914546624981, 0.184849184646742, 1.58784533120882, + -1.13037567424629, -0.0802517565509893, 0.132420284381094, + 0.707954729271733, -0.23969802417184, 1.98447393665293, + -0.138787012119665, 0.417650750792556, 0.981752777463662, + -0.392695355503813, -1.03966897694891, 1.78222896030858, + -2.31106908460517, 0.878604580921265, 0.035806718015226, + 1.01282869212708, 0.432265154539617, 2.09081920524915, + -1.19992581964387, 1.58963820029007, 1.95465164222325, + 0.00493777682814261, -2.45170638784613, 0.477237302613617, + -0.596558168631403, 0.792203270299649, 0.289636710177348] + + x1 = np.array(x1) + x2 = np.array(x2) + x1.shape = (10, 2) + x2.shape = (15, 2) + assert_array_almost_equal(stats.mood(x1, x2, axis=None), + [-1.31716607555, 0.18778296257]) + + def test_mood_2d(self): + # Test if the results of mood test in 2-D case are consistent with the + # R result for the same inputs. Numbers from R mood.test(). + ny = 5 + np.random.seed(1234) + x1 = np.random.randn(10, ny) + x2 = np.random.randn(15, ny) + z_vectest, pval_vectest = stats.mood(x1, x2) + + for j in range(ny): + assert_array_almost_equal([z_vectest[j], pval_vectest[j]], + stats.mood(x1[:, j], x2[:, j])) + + # inverse order of dimensions + x1 = x1.transpose() + x2 = x2.transpose() + z_vectest, pval_vectest = stats.mood(x1, x2, axis=1) + + for i in range(ny): + # check axis handling is self consistent + assert_array_almost_equal([z_vectest[i], pval_vectest[i]], + stats.mood(x1[i, :], x2[i, :])) + + def test_mood_3d(self): + shape = (10, 5, 6) + np.random.seed(1234) + x1 = np.random.randn(*shape) + x2 = np.random.randn(*shape) + + for axis in range(3): + z_vectest, pval_vectest = stats.mood(x1, x2, axis=axis) + # Tests that result for 3-D arrays is equal to that for the + # same calculation on a set of 1-D arrays taken from the + # 3-D array + axes_idx = ([1, 2], [0, 2], [0, 1]) # the two axes != axis + for i in range(shape[axes_idx[axis][0]]): + for j in range(shape[axes_idx[axis][1]]): + if axis == 0: + slice1 = x1[:, i, j] + slice2 = x2[:, i, j] + elif axis == 1: + slice1 = x1[i, :, j] + slice2 = x2[i, :, j] + else: + slice1 = x1[i, j, :] + slice2 = x2[i, j, :] + + assert_array_almost_equal([z_vectest[i, j], + pval_vectest[i, j]], + stats.mood(slice1, slice2)) + + def test_mood_bad_arg(self): + # Raise ValueError when the sum of the lengths of the args is + # less than 3 + assert_raises(ValueError, stats.mood, [1], []) + + def test_mood_alternative(self): + + np.random.seed(0) + x = stats.norm.rvs(scale=0.75, size=100) + y = stats.norm.rvs(scale=1.25, size=100) + + stat1, p1 = stats.mood(x, y, alternative='two-sided') + stat2, p2 = stats.mood(x, y, alternative='less') + stat3, p3 = stats.mood(x, y, alternative='greater') + + assert stat1 == stat2 == stat3 + assert_allclose(p1, 0, atol=1e-7) + assert_allclose(p2, p1/2) + assert_allclose(p3, 1 - p1/2) + + with pytest.raises(ValueError, match="`alternative` must be..."): + stats.mood(x, y, alternative='ekki-ekki') + + @pytest.mark.parametrize("alternative", ['two-sided', 'less', 'greater']) + def test_result(self, alternative): + rng = np.random.default_rng(265827767938813079281100964083953437622) + x1 = rng.standard_normal((10, 1)) + x2 = rng.standard_normal((15, 1)) + + res = stats.mood(x1, x2, alternative=alternative) + assert_equal((res.statistic, res.pvalue), res) + + +class TestProbplot: + + def test_basic(self): + x = stats.norm.rvs(size=20, random_state=12345) + osm, osr = stats.probplot(x, fit=False) + osm_expected = [-1.8241636, -1.38768012, -1.11829229, -0.91222575, + -0.73908135, -0.5857176, -0.44506467, -0.31273668, + -0.18568928, -0.06158146, 0.06158146, 0.18568928, + 0.31273668, 0.44506467, 0.5857176, 0.73908135, + 0.91222575, 1.11829229, 1.38768012, 1.8241636] + assert_allclose(osr, np.sort(x)) + assert_allclose(osm, osm_expected) + + res, res_fit = stats.probplot(x, fit=True) + res_fit_expected = [1.05361841, 0.31297795, 0.98741609] + assert_allclose(res_fit, res_fit_expected) + + def test_sparams_keyword(self): + x = stats.norm.rvs(size=100, random_state=123456) + # Check that None, () and 0 (loc=0, for normal distribution) all work + # and give the same results + osm1, osr1 = stats.probplot(x, sparams=None, fit=False) + osm2, osr2 = stats.probplot(x, sparams=0, fit=False) + osm3, osr3 = stats.probplot(x, sparams=(), fit=False) + assert_allclose(osm1, osm2) + assert_allclose(osm1, osm3) + assert_allclose(osr1, osr2) + assert_allclose(osr1, osr3) + # Check giving (loc, scale) params for normal distribution + osm, osr = stats.probplot(x, sparams=(), fit=False) + + def test_dist_keyword(self): + x = stats.norm.rvs(size=20, random_state=12345) + osm1, osr1 = stats.probplot(x, fit=False, dist='t', sparams=(3,)) + osm2, osr2 = stats.probplot(x, fit=False, dist=stats.t, sparams=(3,)) + assert_allclose(osm1, osm2) + assert_allclose(osr1, osr2) + + assert_raises(ValueError, stats.probplot, x, dist='wrong-dist-name') + assert_raises(AttributeError, stats.probplot, x, dist=[]) + + class custom_dist: + """Some class that looks just enough like a distribution.""" + def ppf(self, q): + return stats.norm.ppf(q, loc=2) + + osm1, osr1 = stats.probplot(x, sparams=(2,), fit=False) + osm2, osr2 = stats.probplot(x, dist=custom_dist(), fit=False) + assert_allclose(osm1, osm2) + assert_allclose(osr1, osr2) + + @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") + def test_plot_kwarg(self): + fig = plt.figure() + fig.add_subplot(111) + x = stats.t.rvs(3, size=100, random_state=7654321) + res1, fitres1 = stats.probplot(x, plot=plt) + plt.close() + res2, fitres2 = stats.probplot(x, plot=None) + res3 = stats.probplot(x, fit=False, plot=plt) + plt.close() + res4 = stats.probplot(x, fit=False, plot=None) + # Check that results are consistent between combinations of `fit` and + # `plot` keywords. + assert_(len(res1) == len(res2) == len(res3) == len(res4) == 2) + assert_allclose(res1, res2) + assert_allclose(res1, res3) + assert_allclose(res1, res4) + assert_allclose(fitres1, fitres2) + + # Check that a Matplotlib Axes object is accepted + fig = plt.figure() + ax = fig.add_subplot(111) + stats.probplot(x, fit=False, plot=ax) + plt.close() + + def test_probplot_bad_args(self): + # Raise ValueError when given an invalid distribution. + assert_raises(ValueError, stats.probplot, [1], dist="plate_of_shrimp") + + def test_empty(self): + assert_equal(stats.probplot([], fit=False), + (np.array([]), np.array([]))) + assert_equal(stats.probplot([], fit=True), + ((np.array([]), np.array([])), + (np.nan, np.nan, 0.0))) + + def test_array_of_size_one(self): + with np.errstate(invalid='ignore'): + assert_equal(stats.probplot([1], fit=True), + ((np.array([0.]), np.array([1])), + (np.nan, np.nan, 0.0))) + + +class TestWilcoxon: + def test_wilcoxon_bad_arg(self): + # Raise ValueError when two args of different lengths are given or + # zero_method is unknown. + assert_raises(ValueError, stats.wilcoxon, [1], [1, 2]) + assert_raises(ValueError, stats.wilcoxon, [1, 2], [1, 2], "dummy") + assert_raises(ValueError, stats.wilcoxon, [1, 2], [1, 2], + alternative="dummy") + assert_raises(ValueError, stats.wilcoxon, [1]*10, mode="xyz") + + def test_zero_diff(self): + x = np.arange(20) + # pratt and wilcox do not work if x - y == 0 + assert_raises(ValueError, stats.wilcoxon, x, x, "wilcox", + mode="approx") + assert_raises(ValueError, stats.wilcoxon, x, x, "pratt", + mode="approx") + # ranksum is n*(n+1)/2, split in half if zero_method == "zsplit" + assert_equal(stats.wilcoxon(x, x, "zsplit", mode="approx"), + (20*21/4, 1.0)) + + def test_pratt(self): + # regression test for gh-6805: p-value matches value from R package + # coin (wilcoxsign_test) reported in the issue + x = [1, 2, 3, 4] + y = [1, 2, 3, 5] + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Sample size too small") + res = stats.wilcoxon(x, y, zero_method="pratt", mode="approx") + assert_allclose(res, (0.0, 0.31731050786291415)) + + def test_wilcoxon_arg_type(self): + # Should be able to accept list as arguments. + # Address issue 6070. + arr = [1, 2, 3, 0, -1, 3, 1, 2, 1, 1, 2] + + _ = stats.wilcoxon(arr, zero_method="pratt", mode="approx") + _ = stats.wilcoxon(arr, zero_method="zsplit", mode="approx") + _ = stats.wilcoxon(arr, zero_method="wilcox", mode="approx") + + def test_accuracy_wilcoxon(self): + freq = [1, 4, 16, 15, 8, 4, 5, 1, 2] + nums = range(-4, 5) + x = np.concatenate([[u] * v for u, v in zip(nums, freq)]) + y = np.zeros(x.size) + + T, p = stats.wilcoxon(x, y, "pratt", mode="approx") + assert_allclose(T, 423) + assert_allclose(p, 0.0031724568006762576) + + T, p = stats.wilcoxon(x, y, "zsplit", mode="approx") + assert_allclose(T, 441) + assert_allclose(p, 0.0032145343172473055) + + T, p = stats.wilcoxon(x, y, "wilcox", mode="approx") + assert_allclose(T, 327) + assert_allclose(p, 0.00641346115861) + + # Test the 'correction' option, using values computed in R with: + # > wilcox.test(x, y, paired=TRUE, exact=FALSE, correct={FALSE,TRUE}) + x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112]) + y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187]) + T, p = stats.wilcoxon(x, y, correction=False, mode="approx") + assert_equal(T, 34) + assert_allclose(p, 0.6948866, rtol=1e-6) + T, p = stats.wilcoxon(x, y, correction=True, mode="approx") + assert_equal(T, 34) + assert_allclose(p, 0.7240817, rtol=1e-6) + + def test_wilcoxon_result_attributes(self): + x = np.array([120, 114, 181, 188, 180, 146, 121, 191, 132, 113, 127, 112]) + y = np.array([133, 143, 119, 189, 112, 199, 198, 113, 115, 121, 142, 187]) + res = stats.wilcoxon(x, y, correction=False, mode="approx") + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes) + + def test_wilcoxon_has_zstatistic(self): + rng = np.random.default_rng(89426135444) + x, y = rng.random(15), rng.random(15) + + res = stats.wilcoxon(x, y, mode="approx") + ref = stats.norm.ppf(res.pvalue/2) + assert_allclose(res.zstatistic, ref) + + res = stats.wilcoxon(x, y, mode="exact") + assert not hasattr(res, 'zstatistic') + + res = stats.wilcoxon(x, y) + assert not hasattr(res, 'zstatistic') + + def test_wilcoxon_tie(self): + # Regression test for gh-2391. + # Corresponding R code is: + # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=FALSE) + # > result$p.value + # [1] 0.001565402 + # > result = wilcox.test(rep(0.1, 10), exact=FALSE, correct=TRUE) + # > result$p.value + # [1] 0.001904195 + stat, p = stats.wilcoxon([0.1] * 10, mode="approx") + expected_p = 0.001565402 + assert_equal(stat, 0) + assert_allclose(p, expected_p, rtol=1e-6) + + stat, p = stats.wilcoxon([0.1] * 10, correction=True, mode="approx") + expected_p = 0.001904195 + assert_equal(stat, 0) + assert_allclose(p, expected_p, rtol=1e-6) + + def test_onesided(self): + # tested against "R version 3.4.1 (2017-06-30)" + # x <- c(125, 115, 130, 140, 140, 115, 140, 125, 140, 135) + # y <- c(110, 122, 125, 120, 140, 124, 123, 137, 135, 145) + # cfg <- list(x = x, y = y, paired = TRUE, exact = FALSE) + # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = FALSE))) + # do.call(wilcox.test, c(cfg, list(alternative = "less", correct = TRUE))) + # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = FALSE))) + # do.call(wilcox.test, c(cfg, list(alternative = "greater", correct = TRUE))) + x = [125, 115, 130, 140, 140, 115, 140, 125, 140, 135] + y = [110, 122, 125, 120, 140, 124, 123, 137, 135, 145] + + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Sample size too small") + w, p = stats.wilcoxon(x, y, alternative="less", mode="approx") + assert_equal(w, 27) + assert_almost_equal(p, 0.7031847, decimal=6) + + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Sample size too small") + w, p = stats.wilcoxon(x, y, alternative="less", correction=True, + mode="approx") + assert_equal(w, 27) + assert_almost_equal(p, 0.7233656, decimal=6) + + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Sample size too small") + w, p = stats.wilcoxon(x, y, alternative="greater", mode="approx") + assert_equal(w, 27) + assert_almost_equal(p, 0.2968153, decimal=6) + + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Sample size too small") + w, p = stats.wilcoxon(x, y, alternative="greater", correction=True, + mode="approx") + assert_equal(w, 27) + assert_almost_equal(p, 0.3176447, decimal=6) + + def test_exact_basic(self): + for n in range(1, 51): + pmf1 = _get_wilcoxon_distr(n) + pmf2 = _get_wilcoxon_distr2(n) + assert_equal(n*(n+1)/2 + 1, len(pmf1)) + assert_equal(sum(pmf1), 1) + assert_array_almost_equal(pmf1, pmf2) + + def test_exact_pval(self): + # expected values computed with "R version 3.4.1 (2017-06-30)" + x = np.array([1.81, 0.82, 1.56, -0.48, 0.81, 1.28, -1.04, 0.23, + -0.75, 0.14]) + y = np.array([0.71, 0.65, -0.2, 0.85, -1.1, -0.45, -0.84, -0.24, + -0.68, -0.76]) + _, p = stats.wilcoxon(x, y, alternative="two-sided", mode="exact") + assert_almost_equal(p, 0.1054688, decimal=6) + _, p = stats.wilcoxon(x, y, alternative="less", mode="exact") + assert_almost_equal(p, 0.9580078, decimal=6) + _, p = stats.wilcoxon(x, y, alternative="greater", mode="exact") + assert_almost_equal(p, 0.05273438, decimal=6) + + x = np.arange(0, 20) + 0.5 + y = np.arange(20, 0, -1) + _, p = stats.wilcoxon(x, y, alternative="two-sided", mode="exact") + assert_almost_equal(p, 0.8694878, decimal=6) + _, p = stats.wilcoxon(x, y, alternative="less", mode="exact") + assert_almost_equal(p, 0.4347439, decimal=6) + _, p = stats.wilcoxon(x, y, alternative="greater", mode="exact") + assert_almost_equal(p, 0.5795889, decimal=6) + + # These inputs were chosen to give a W statistic that is either the + # center of the distribution (when the length of the support is odd), or + # the value to the left of the center (when the length of the support is + # even). Also, the numbers are chosen so that the W statistic is the + # sum of the positive values. + + @pytest.mark.parametrize('x', [[-1, -2, 3], + [-1, 2, -3, -4, 5], + [-1, -2, 3, -4, -5, -6, 7, 8]]) + def test_exact_p_1(self, x): + w, p = stats.wilcoxon(x) + x = np.array(x) + wtrue = x[x > 0].sum() + assert_equal(w, wtrue) + assert_equal(p, 1) + + def test_auto(self): + # auto default to exact if there are no ties and n<= 25 + x = np.arange(0, 25) + 0.5 + y = np.arange(25, 0, -1) + assert_equal(stats.wilcoxon(x, y), + stats.wilcoxon(x, y, mode="exact")) + + # if there are ties (i.e. zeros in d = x-y), then switch to approx + d = np.arange(0, 13) + with suppress_warnings() as sup: + sup.filter(UserWarning, message="Exact p-value calculation") + w, p = stats.wilcoxon(d) + assert_equal(stats.wilcoxon(d, mode="approx"), (w, p)) + + # use approximation for samples > 25 + d = np.arange(1, 52) + assert_equal(stats.wilcoxon(d), stats.wilcoxon(d, mode="approx")) + + @pytest.mark.parametrize('size', [3, 5, 10]) + def test_permutation_method(self, size): + rng = np.random.default_rng(92348034828501345) + x = rng.random(size=size) + res = stats.wilcoxon(x, method=stats.PermutationMethod()) + ref = stats.wilcoxon(x, method='exact') + assert_equal(res.statistic, ref.statistic) + assert_equal(res.pvalue, ref.pvalue) + + x = rng.random(size=size*10) + rng = np.random.default_rng(59234803482850134) + pm = stats.PermutationMethod(n_resamples=99, random_state=rng) + ref = stats.wilcoxon(x, method=pm) + rng = np.random.default_rng(59234803482850134) + pm = stats.PermutationMethod(n_resamples=99, random_state=rng) + res = stats.wilcoxon(x, method=pm) + + assert_equal(np.round(res.pvalue, 2), res.pvalue) # n_resamples used + assert_equal(res.pvalue, ref.pvalue) # random_state used + + def test_method_auto_nan_propagate_ND_length_gt_50_gh20591(self): + # When method!='approx', nan_policy='propagate', and a slice of + # a >1 dimensional array input contained NaN, the result object of + # `wilcoxon` could (under yet other conditions) return `zstatistic` + # for some slices but not others. This resulted in an error because + # `apply_along_axis` would have to create a ragged array. + # Check that this is resolved. + rng = np.random.default_rng(235889269872456) + A = rng.normal(size=(51, 2)) # length along slice > exact threshold + A[5, 1] = np.nan + res = stats.wilcoxon(A) + ref = stats.wilcoxon(A, method='approx') + assert_allclose(res, ref) + assert hasattr(ref, 'zstatistic') + assert not hasattr(res, 'zstatistic') + + +class TestKstat: + def test_moments_normal_distribution(self): + np.random.seed(32149) + data = np.random.randn(12345) + moments = [stats.kstat(data, n) for n in [1, 2, 3, 4]] + + expected = [0.011315, 1.017931, 0.05811052, 0.0754134] + assert_allclose(moments, expected, rtol=1e-4) + + # test equivalence with `stats.moment` + m1 = stats.moment(data, order=1) + m2 = stats.moment(data, order=2) + m3 = stats.moment(data, order=3) + assert_allclose((m1, m2, m3), expected[:-1], atol=0.02, rtol=1e-2) + + def test_empty_input(self): + assert_raises(ValueError, stats.kstat, []) + + def test_nan_input(self): + data = np.arange(10.) + data[6] = np.nan + + assert_equal(stats.kstat(data), np.nan) + + def test_kstat_bad_arg(self): + # Raise ValueError if n > 4 or n < 1. + data = np.arange(10) + for n in [0, 4.001]: + assert_raises(ValueError, stats.kstat, data, n=n) + + +class TestKstatVar: + def test_empty_input(self): + assert_raises(ValueError, stats.kstatvar, []) + + def test_nan_input(self): + data = np.arange(10.) + data[6] = np.nan + + assert_equal(stats.kstat(data), np.nan) + + def test_bad_arg(self): + # Raise ValueError is n is not 1 or 2. + data = [1] + n = 10 + assert_raises(ValueError, stats.kstatvar, data, n=n) + + +class TestPpccPlot: + def setup_method(self): + self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5 + + def test_basic(self): + N = 5 + svals, ppcc = stats.ppcc_plot(self.x, -10, 10, N=N) + ppcc_expected = [0.21139644, 0.21384059, 0.98766719, 0.97980182, + 0.93519298] + assert_allclose(svals, np.linspace(-10, 10, num=N)) + assert_allclose(ppcc, ppcc_expected) + + def test_dist(self): + # Test that we can specify distributions both by name and as objects. + svals1, ppcc1 = stats.ppcc_plot(self.x, -10, 10, dist='tukeylambda') + svals2, ppcc2 = stats.ppcc_plot(self.x, -10, 10, + dist=stats.tukeylambda) + assert_allclose(svals1, svals2, rtol=1e-20) + assert_allclose(ppcc1, ppcc2, rtol=1e-20) + # Test that 'tukeylambda' is the default dist + svals3, ppcc3 = stats.ppcc_plot(self.x, -10, 10) + assert_allclose(svals1, svals3, rtol=1e-20) + assert_allclose(ppcc1, ppcc3, rtol=1e-20) + + @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") + def test_plot_kwarg(self): + # Check with the matplotlib.pyplot module + fig = plt.figure() + ax = fig.add_subplot(111) + stats.ppcc_plot(self.x, -20, 20, plot=plt) + fig.delaxes(ax) + + # Check that a Matplotlib Axes object is accepted + ax = fig.add_subplot(111) + stats.ppcc_plot(self.x, -20, 20, plot=ax) + plt.close() + + def test_invalid_inputs(self): + # `b` has to be larger than `a` + assert_raises(ValueError, stats.ppcc_plot, self.x, 1, 0) + + # Raise ValueError when given an invalid distribution. + assert_raises(ValueError, stats.ppcc_plot, [1, 2, 3], 0, 1, + dist="plate_of_shrimp") + + def test_empty(self): + # For consistency with probplot return for one empty array, + # ppcc contains all zeros and svals is the same as for normal array + # input. + svals, ppcc = stats.ppcc_plot([], 0, 1) + assert_allclose(svals, np.linspace(0, 1, num=80)) + assert_allclose(ppcc, np.zeros(80, dtype=float)) + + +class TestPpccMax: + def test_ppcc_max_bad_arg(self): + # Raise ValueError when given an invalid distribution. + data = [1] + assert_raises(ValueError, stats.ppcc_max, data, dist="plate_of_shrimp") + + def test_ppcc_max_basic(self): + x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000, + random_state=1234567) + 1e4 + assert_almost_equal(stats.ppcc_max(x), -0.71215366521264145, decimal=7) + + def test_dist(self): + x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000, + random_state=1234567) + 1e4 + + # Test that we can specify distributions both by name and as objects. + max1 = stats.ppcc_max(x, dist='tukeylambda') + max2 = stats.ppcc_max(x, dist=stats.tukeylambda) + assert_almost_equal(max1, -0.71215366521264145, decimal=5) + assert_almost_equal(max2, -0.71215366521264145, decimal=5) + + # Test that 'tukeylambda' is the default dist + max3 = stats.ppcc_max(x) + assert_almost_equal(max3, -0.71215366521264145, decimal=5) + + def test_brack(self): + x = stats.tukeylambda.rvs(-0.7, loc=2, scale=0.5, size=10000, + random_state=1234567) + 1e4 + assert_raises(ValueError, stats.ppcc_max, x, brack=(0.0, 1.0, 0.5)) + + assert_almost_equal(stats.ppcc_max(x, brack=(0, 1)), + -0.71215366521264145, decimal=7) + + assert_almost_equal(stats.ppcc_max(x, brack=(-2, 2)), + -0.71215366521264145, decimal=7) + + +class TestBoxcox_llf: + + def test_basic(self): + x = stats.norm.rvs(size=10000, loc=10, random_state=54321) + lmbda = 1 + llf = stats.boxcox_llf(lmbda, x) + llf_expected = -x.size / 2. * np.log(np.sum(x.std()**2)) + assert_allclose(llf, llf_expected) + + def test_array_like(self): + x = stats.norm.rvs(size=100, loc=10, random_state=54321) + lmbda = 1 + llf = stats.boxcox_llf(lmbda, x) + llf2 = stats.boxcox_llf(lmbda, list(x)) + assert_allclose(llf, llf2, rtol=1e-12) + + def test_2d_input(self): + # Note: boxcox_llf() was already working with 2-D input (sort of), so + # keep it like that. boxcox() doesn't work with 2-D input though, due + # to brent() returning a scalar. + x = stats.norm.rvs(size=100, loc=10, random_state=54321) + lmbda = 1 + llf = stats.boxcox_llf(lmbda, x) + llf2 = stats.boxcox_llf(lmbda, np.vstack([x, x]).T) + assert_allclose([llf, llf], llf2, rtol=1e-12) + + def test_empty(self): + assert_(np.isnan(stats.boxcox_llf(1, []))) + + def test_gh_6873(self): + # Regression test for gh-6873. + # This example was taken from gh-7534, a duplicate of gh-6873. + data = [198.0, 233.0, 233.0, 392.0] + llf = stats.boxcox_llf(-8, data) + # The expected value was computed with mpmath. + assert_allclose(llf, -17.93934208579061) + + def test_instability_gh20021(self): + data = [2003, 1950, 1997, 2000, 2009] + llf = stats.boxcox_llf(1e-8, data) + # The expected value was computed with mpsci, set mpmath.mp.dps=100 + assert_allclose(llf, -15.32401272869016598) + + +# This is the data from github user Qukaiyi, given as an example +# of a data set that caused boxcox to fail. +_boxcox_data = [ + 15957, 112079, 1039553, 711775, 173111, 307382, 183155, 53366, 760875, + 207500, 160045, 473714, 40194, 440319, 133261, 265444, 155590, 36660, + 904939, 55108, 138391, 339146, 458053, 63324, 1377727, 1342632, 41575, + 68685, 172755, 63323, 368161, 199695, 538214, 167760, 388610, 398855, + 1001873, 364591, 1320518, 194060, 194324, 2318551, 196114, 64225, 272000, + 198668, 123585, 86420, 1925556, 695798, 88664, 46199, 759135, 28051, + 345094, 1977752, 51778, 82746, 638126, 2560910, 45830, 140576, 1603787, + 57371, 548730, 5343629, 2298913, 998813, 2156812, 423966, 68350, 145237, + 131935, 1600305, 342359, 111398, 1409144, 281007, 60314, 242004, 113418, + 246211, 61940, 95858, 957805, 40909, 307955, 174159, 124278, 241193, + 872614, 304180, 146719, 64361, 87478, 509360, 167169, 933479, 620561, + 483333, 97416, 143518, 286905, 597837, 2556043, 89065, 69944, 196858, + 88883, 49379, 916265, 1527392, 626954, 54415, 89013, 2883386, 106096, + 402697, 45578, 349852, 140379, 34648, 757343, 1305442, 2054757, 121232, + 606048, 101492, 51426, 1820833, 83412, 136349, 1379924, 505977, 1303486, + 95853, 146451, 285422, 2205423, 259020, 45864, 684547, 182014, 784334, + 174793, 563068, 170745, 1195531, 63337, 71833, 199978, 2330904, 227335, + 898280, 75294, 2011361, 116771, 157489, 807147, 1321443, 1148635, 2456524, + 81839, 1228251, 97488, 1051892, 75397, 3009923, 2732230, 90923, 39735, + 132433, 225033, 337555, 1204092, 686588, 1062402, 40362, 1361829, 1497217, + 150074, 551459, 2019128, 39581, 45349, 1117187, 87845, 1877288, 164448, + 10338362, 24942, 64737, 769946, 2469124, 2366997, 259124, 2667585, 29175, + 56250, 74450, 96697, 5920978, 838375, 225914, 119494, 206004, 430907, + 244083, 219495, 322239, 407426, 618748, 2087536, 2242124, 4736149, 124624, + 406305, 240921, 2675273, 4425340, 821457, 578467, 28040, 348943, 48795, + 145531, 52110, 1645730, 1768364, 348363, 85042, 2673847, 81935, 169075, + 367733, 135474, 383327, 1207018, 93481, 5934183, 352190, 636533, 145870, + 55659, 146215, 73191, 248681, 376907, 1606620, 169381, 81164, 246390, + 236093, 885778, 335969, 49266, 381430, 307437, 350077, 34346, 49340, + 84715, 527120, 40163, 46898, 4609439, 617038, 2239574, 159905, 118337, + 120357, 430778, 3799158, 3516745, 54198, 2970796, 729239, 97848, 6317375, + 887345, 58198, 88111, 867595, 210136, 1572103, 1420760, 574046, 845988, + 509743, 397927, 1119016, 189955, 3883644, 291051, 126467, 1239907, 2556229, + 411058, 657444, 2025234, 1211368, 93151, 577594, 4842264, 1531713, 305084, + 479251, 20591, 1466166, 137417, 897756, 594767, 3606337, 32844, 82426, + 1294831, 57174, 290167, 322066, 813146, 5671804, 4425684, 895607, 450598, + 1048958, 232844, 56871, 46113, 70366, 701618, 97739, 157113, 865047, + 194810, 1501615, 1765727, 38125, 2733376, 40642, 437590, 127337, 106310, + 4167579, 665303, 809250, 1210317, 45750, 1853687, 348954, 156786, 90793, + 1885504, 281501, 3902273, 359546, 797540, 623508, 3672775, 55330, 648221, + 266831, 90030, 7118372, 735521, 1009925, 283901, 806005, 2434897, 94321, + 309571, 4213597, 2213280, 120339, 64403, 8155209, 1686948, 4327743, + 1868312, 135670, 3189615, 1569446, 706058, 58056, 2438625, 520619, 105201, + 141961, 179990, 1351440, 3148662, 2804457, 2760144, 70775, 33807, 1926518, + 2362142, 186761, 240941, 97860, 1040429, 1431035, 78892, 484039, 57845, + 724126, 3166209, 175913, 159211, 1182095, 86734, 1921472, 513546, 326016, + 1891609 +] + + +class TestBoxcox: + + def test_fixed_lmbda(self): + x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5 + xt = stats.boxcox(x, lmbda=1) + assert_allclose(xt, x - 1) + xt = stats.boxcox(x, lmbda=-1) + assert_allclose(xt, 1 - 1/x) + + xt = stats.boxcox(x, lmbda=0) + assert_allclose(xt, np.log(x)) + + # Also test that array_like input works + xt = stats.boxcox(list(x), lmbda=0) + assert_allclose(xt, np.log(x)) + + # test that constant input is accepted; see gh-12225 + xt = stats.boxcox(np.ones(10), 2) + assert_equal(xt, np.zeros(10)) + + def test_lmbda_None(self): + # Start from normal rv's, do inverse transform to check that + # optimization function gets close to the right answer. + lmbda = 2.5 + x = stats.norm.rvs(loc=10, size=50000, random_state=1245) + x_inv = (x * lmbda + 1)**(-lmbda) + xt, maxlog = stats.boxcox(x_inv) + + assert_almost_equal(maxlog, -1 / lmbda, decimal=2) + + def test_alpha(self): + rng = np.random.RandomState(1234) + x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5 + + # Some regular values for alpha, on a small sample size + _, _, interval = stats.boxcox(x, alpha=0.75) + assert_allclose(interval, [4.004485780226041, 5.138756355035744]) + _, _, interval = stats.boxcox(x, alpha=0.05) + assert_allclose(interval, [1.2138178554857557, 8.209033272375663]) + + # Try some extreme values, see we don't hit the N=500 limit + x = _old_loggamma_rvs(7, size=500, random_state=rng) + 15 + _, _, interval = stats.boxcox(x, alpha=0.001) + assert_allclose(interval, [0.3988867, 11.40553131]) + _, _, interval = stats.boxcox(x, alpha=0.999) + assert_allclose(interval, [5.83316246, 5.83735292]) + + def test_boxcox_bad_arg(self): + # Raise ValueError if any data value is negative. + x = np.array([-1, 2]) + assert_raises(ValueError, stats.boxcox, x) + # Raise ValueError if data is constant. + assert_raises(ValueError, stats.boxcox, np.array([1])) + # Raise ValueError if data is not 1-dimensional. + assert_raises(ValueError, stats.boxcox, np.array([[1], [2]])) + + def test_empty(self): + assert_(stats.boxcox([]).shape == (0,)) + + def test_gh_6873(self): + # Regression test for gh-6873. + y, lam = stats.boxcox(_boxcox_data) + # The expected value of lam was computed with the function + # powerTransform in the R library 'car'. I trust that value + # to only about five significant digits. + assert_allclose(lam, -0.051654, rtol=1e-5) + + @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)]) + def test_bounded_optimizer_within_bounds(self, bounds): + # Define custom optimizer with bounds. + def optimizer(fun): + return optimize.minimize_scalar(fun, bounds=bounds, + method="bounded") + + _, lmbda = stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer) + assert bounds[0] < lmbda < bounds[1] + + def test_bounded_optimizer_against_unbounded_optimizer(self): + # Test whether setting bounds on optimizer excludes solution from + # unbounded optimizer. + + # Get unbounded solution. + _, lmbda = stats.boxcox(_boxcox_data, lmbda=None) + + # Set tolerance and bounds around solution. + bounds = (lmbda + 0.1, lmbda + 1) + options = {'xatol': 1e-12} + + def optimizer(fun): + return optimize.minimize_scalar(fun, bounds=bounds, + method="bounded", options=options) + + # Check bounded solution. Lower bound should be active. + _, lmbda_bounded = stats.boxcox(_boxcox_data, lmbda=None, + optimizer=optimizer) + assert lmbda_bounded != lmbda + assert_allclose(lmbda_bounded, bounds[0]) + + @pytest.mark.parametrize("optimizer", ["str", (1, 2), 0.1]) + def test_bad_optimizer_type_raises_error(self, optimizer): + # Check if error is raised if string, tuple or float is passed + with pytest.raises(ValueError, match="`optimizer` must be a callable"): + stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer) + + def test_bad_optimizer_value_raises_error(self): + # Check if error is raised if `optimizer` function does not return + # `OptimizeResult` object + + # Define test function that always returns 1 + def optimizer(fun): + return 1 + + message = "return an object containing the optimal `lmbda`" + with pytest.raises(ValueError, match=message): + stats.boxcox(_boxcox_data, lmbda=None, optimizer=optimizer) + + @pytest.mark.parametrize( + "bad_x", [np.array([1, -42, 12345.6]), np.array([np.nan, 42, 1])] + ) + def test_negative_x_value_raises_error(self, bad_x): + """Test boxcox_normmax raises ValueError if x contains non-positive values.""" + message = "only positive, finite, real numbers" + with pytest.raises(ValueError, match=message): + stats.boxcox_normmax(bad_x) + + @pytest.mark.parametrize('x', [ + # Attempt to trigger overflow in power expressions. + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0, + 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]), + # Attempt to trigger overflow with a large optimal lambda. + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0]), + # Attempt to trigger overflow with large data. + np.array([2003.0e200, 1950.0e200, 1997.0e200, 2000.0e200, 2009.0e200]) + ]) + def test_overflow(self, x): + with pytest.warns(UserWarning, match="The optimal lambda is"): + xt_bc, lam_bc = stats.boxcox(x) + assert np.all(np.isfinite(xt_bc)) + + +class TestBoxcoxNormmax: + def setup_method(self): + self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5 + + def test_pearsonr(self): + maxlog = stats.boxcox_normmax(self.x) + assert_allclose(maxlog, 1.804465, rtol=1e-6) + + def test_mle(self): + maxlog = stats.boxcox_normmax(self.x, method='mle') + assert_allclose(maxlog, 1.758101, rtol=1e-6) + + # Check that boxcox() uses 'mle' + _, maxlog_boxcox = stats.boxcox(self.x) + assert_allclose(maxlog_boxcox, maxlog) + + def test_all(self): + maxlog_all = stats.boxcox_normmax(self.x, method='all') + assert_allclose(maxlog_all, [1.804465, 1.758101], rtol=1e-6) + + @pytest.mark.parametrize("method", ["mle", "pearsonr", "all"]) + @pytest.mark.parametrize("bounds", [(-1, 1), (1.1, 2), (-2, -1.1)]) + def test_bounded_optimizer_within_bounds(self, method, bounds): + + def optimizer(fun): + return optimize.minimize_scalar(fun, bounds=bounds, + method="bounded") + + maxlog = stats.boxcox_normmax(self.x, method=method, + optimizer=optimizer) + assert np.all(bounds[0] < maxlog) + assert np.all(maxlog < bounds[1]) + + def test_user_defined_optimizer(self): + # tests an optimizer that is not based on scipy.optimize.minimize + lmbda = stats.boxcox_normmax(self.x) + lmbda_rounded = np.round(lmbda, 5) + lmbda_range = np.linspace(lmbda_rounded-0.01, lmbda_rounded+0.01, 1001) + + class MyResult: + pass + + def optimizer(fun): + # brute force minimum over the range + objs = [] + for lmbda in lmbda_range: + objs.append(fun(lmbda)) + res = MyResult() + res.x = lmbda_range[np.argmin(objs)] + return res + + lmbda2 = stats.boxcox_normmax(self.x, optimizer=optimizer) + assert lmbda2 != lmbda # not identical + assert_allclose(lmbda2, lmbda, 1e-5) # but as close as it should be + + def test_user_defined_optimizer_and_brack_raises_error(self): + optimizer = optimize.minimize_scalar + + # Using default `brack=None` with user-defined `optimizer` works as + # expected. + stats.boxcox_normmax(self.x, brack=None, optimizer=optimizer) + + # Using user-defined `brack` with user-defined `optimizer` is expected + # to throw an error. Instead, users should specify + # optimizer-specific parameters in the optimizer function itself. + with pytest.raises(ValueError, match="`brack` must be None if " + "`optimizer` is given"): + + stats.boxcox_normmax(self.x, brack=(-2.0, 2.0), + optimizer=optimizer) + + @pytest.mark.parametrize( + 'x', ([2003.0, 1950.0, 1997.0, 2000.0, 2009.0], + [0.50000471, 0.50004979, 0.50005902, 0.50009312, 0.50001632])) + def test_overflow(self, x): + message = "The optimal lambda is..." + with pytest.warns(UserWarning, match=message): + lmbda = stats.boxcox_normmax(x, method='mle') + assert np.isfinite(special.boxcox(x, lmbda)).all() + # 10000 is safety factor used in boxcox_normmax + ymax = np.finfo(np.float64).max / 10000 + x_treme = np.max(x) if lmbda > 0 else np.min(x) + y_extreme = special.boxcox(x_treme, lmbda) + assert_allclose(y_extreme, ymax * np.sign(lmbda)) + + def test_negative_ymax(self): + with pytest.raises(ValueError, match="`ymax` must be strictly positive"): + stats.boxcox_normmax(self.x, ymax=-1) + + @pytest.mark.parametrize("x", [ + # positive overflow in float64 + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0], + dtype=np.float64), + # negative overflow in float64 + np.array([0.50000471, 0.50004979, 0.50005902, 0.50009312, 0.50001632], + dtype=np.float64), + # positive overflow in float32 + np.array([200.3, 195.0, 199.7, 200.0, 200.9], + dtype=np.float32), + # negative overflow in float32 + np.array([2e-30, 1e-30, 1e-30, 1e-30, 1e-30, 1e-30], + dtype=np.float32), + ]) + @pytest.mark.parametrize("ymax", [1e10, 1e30, None]) + # TODO: add method "pearsonr" after fix overflow issue + @pytest.mark.parametrize("method", ["mle"]) + def test_user_defined_ymax_input_float64_32(self, x, ymax, method): + # Test the maximum of the transformed data close to ymax + with pytest.warns(UserWarning, match="The optimal lambda is"): + kwarg = {'ymax': ymax} if ymax is not None else {} + lmb = stats.boxcox_normmax(x, method=method, **kwarg) + x_treme = [np.min(x), np.max(x)] + ymax_res = max(abs(stats.boxcox(x_treme, lmb))) + if ymax is None: + # 10000 is safety factor used in boxcox_normmax + ymax = np.finfo(x.dtype).max / 10000 + assert_allclose(ymax, ymax_res, rtol=1e-5) + + @pytest.mark.parametrize("x", [ + # positive overflow in float32 but not float64 + [200.3, 195.0, 199.7, 200.0, 200.9], + # negative overflow in float32 but not float64 + [2e-30, 1e-30, 1e-30, 1e-30, 1e-30, 1e-30], + ]) + # TODO: add method "pearsonr" after fix overflow issue + @pytest.mark.parametrize("method", ["mle"]) + def test_user_defined_ymax_inf(self, x, method): + x_32 = np.asarray(x, dtype=np.float32) + x_64 = np.asarray(x, dtype=np.float64) + + # assert overflow with float32 but not float64 + with pytest.warns(UserWarning, match="The optimal lambda is"): + stats.boxcox_normmax(x_32, method=method) + stats.boxcox_normmax(x_64, method=method) + + # compute the true optimal lambda then compare them + lmb_32 = stats.boxcox_normmax(x_32, ymax=np.inf, method=method) + lmb_64 = stats.boxcox_normmax(x_64, ymax=np.inf, method=method) + assert_allclose(lmb_32, lmb_64, rtol=1e-2) + + +class TestBoxcoxNormplot: + def setup_method(self): + self.x = _old_loggamma_rvs(5, size=500, random_state=7654321) + 5 + + def test_basic(self): + N = 5 + lmbdas, ppcc = stats.boxcox_normplot(self.x, -10, 10, N=N) + ppcc_expected = [0.57783375, 0.83610988, 0.97524311, 0.99756057, + 0.95843297] + assert_allclose(lmbdas, np.linspace(-10, 10, num=N)) + assert_allclose(ppcc, ppcc_expected) + + @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib") + def test_plot_kwarg(self): + # Check with the matplotlib.pyplot module + fig = plt.figure() + ax = fig.add_subplot(111) + stats.boxcox_normplot(self.x, -20, 20, plot=plt) + fig.delaxes(ax) + + # Check that a Matplotlib Axes object is accepted + ax = fig.add_subplot(111) + stats.boxcox_normplot(self.x, -20, 20, plot=ax) + plt.close() + + def test_invalid_inputs(self): + # `lb` has to be larger than `la` + assert_raises(ValueError, stats.boxcox_normplot, self.x, 1, 0) + # `x` can not contain negative values + assert_raises(ValueError, stats.boxcox_normplot, [-1, 1], 0, 1) + + def test_empty(self): + assert_(stats.boxcox_normplot([], 0, 1).size == 0) + + +class TestYeojohnson_llf: + + def test_array_like(self): + x = stats.norm.rvs(size=100, loc=0, random_state=54321) + lmbda = 1 + llf = stats.yeojohnson_llf(lmbda, x) + llf2 = stats.yeojohnson_llf(lmbda, list(x)) + assert_allclose(llf, llf2, rtol=1e-12) + + def test_2d_input(self): + x = stats.norm.rvs(size=100, loc=10, random_state=54321) + lmbda = 1 + llf = stats.yeojohnson_llf(lmbda, x) + llf2 = stats.yeojohnson_llf(lmbda, np.vstack([x, x]).T) + assert_allclose([llf, llf], llf2, rtol=1e-12) + + def test_empty(self): + assert_(np.isnan(stats.yeojohnson_llf(1, []))) + + +class TestYeojohnson: + + def test_fixed_lmbda(self): + rng = np.random.RandomState(12345) + + # Test positive input + x = _old_loggamma_rvs(5, size=50, random_state=rng) + 5 + assert np.all(x > 0) + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt, x) + xt = stats.yeojohnson(x, lmbda=-1) + assert_allclose(xt, 1 - 1 / (x + 1)) + xt = stats.yeojohnson(x, lmbda=0) + assert_allclose(xt, np.log(x + 1)) + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt, x) + + # Test negative input + x = _old_loggamma_rvs(5, size=50, random_state=rng) - 5 + assert np.all(x < 0) + xt = stats.yeojohnson(x, lmbda=2) + assert_allclose(xt, -np.log(-x + 1)) + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt, x) + xt = stats.yeojohnson(x, lmbda=3) + assert_allclose(xt, 1 / (-x + 1) - 1) + + # test both positive and negative input + x = _old_loggamma_rvs(5, size=50, random_state=rng) - 2 + assert not np.all(x < 0) + assert not np.all(x >= 0) + pos = x >= 0 + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt[pos], x[pos]) + xt = stats.yeojohnson(x, lmbda=-1) + assert_allclose(xt[pos], 1 - 1 / (x[pos] + 1)) + xt = stats.yeojohnson(x, lmbda=0) + assert_allclose(xt[pos], np.log(x[pos] + 1)) + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt[pos], x[pos]) + + neg = ~pos + xt = stats.yeojohnson(x, lmbda=2) + assert_allclose(xt[neg], -np.log(-x[neg] + 1)) + xt = stats.yeojohnson(x, lmbda=1) + assert_allclose(xt[neg], x[neg]) + xt = stats.yeojohnson(x, lmbda=3) + assert_allclose(xt[neg], 1 / (-x[neg] + 1) - 1) + + @pytest.mark.parametrize('lmbda', [0, .1, .5, 2]) + def test_lmbda_None(self, lmbda): + # Start from normal rv's, do inverse transform to check that + # optimization function gets close to the right answer. + + def _inverse_transform(x, lmbda): + x_inv = np.zeros(x.shape, dtype=x.dtype) + pos = x >= 0 + + # when x >= 0 + if abs(lmbda) < np.spacing(1.): + x_inv[pos] = np.exp(x[pos]) - 1 + else: # lmbda != 0 + x_inv[pos] = np.power(x[pos] * lmbda + 1, 1 / lmbda) - 1 + + # when x < 0 + if abs(lmbda - 2) > np.spacing(1.): + x_inv[~pos] = 1 - np.power(-(2 - lmbda) * x[~pos] + 1, + 1 / (2 - lmbda)) + else: # lmbda == 2 + x_inv[~pos] = 1 - np.exp(-x[~pos]) + + return x_inv + + n_samples = 20000 + np.random.seed(1234567) + x = np.random.normal(loc=0, scale=1, size=(n_samples)) + + x_inv = _inverse_transform(x, lmbda) + xt, maxlog = stats.yeojohnson(x_inv) + + assert_allclose(maxlog, lmbda, atol=1e-2) + + assert_almost_equal(0, np.linalg.norm(x - xt) / n_samples, decimal=2) + assert_almost_equal(0, xt.mean(), decimal=1) + assert_almost_equal(1, xt.std(), decimal=1) + + def test_empty(self): + assert_(stats.yeojohnson([]).shape == (0,)) + + def test_array_like(self): + x = stats.norm.rvs(size=100, loc=0, random_state=54321) + xt1, _ = stats.yeojohnson(x) + xt2, _ = stats.yeojohnson(list(x)) + assert_allclose(xt1, xt2, rtol=1e-12) + + @pytest.mark.parametrize('dtype', [np.complex64, np.complex128]) + def test_input_dtype_complex(self, dtype): + x = np.arange(6, dtype=dtype) + err_msg = ('Yeo-Johnson transformation is not defined for complex ' + 'numbers.') + with pytest.raises(ValueError, match=err_msg): + stats.yeojohnson(x) + + @pytest.mark.parametrize('dtype', [np.int8, np.uint8, np.int16, np.int32]) + def test_input_dtype_integer(self, dtype): + x_int = np.arange(8, dtype=dtype) + x_float = np.arange(8, dtype=np.float64) + xt_int, lmbda_int = stats.yeojohnson(x_int) + xt_float, lmbda_float = stats.yeojohnson(x_float) + assert_allclose(xt_int, xt_float, rtol=1e-7) + assert_allclose(lmbda_int, lmbda_float, rtol=1e-7) + + def test_input_high_variance(self): + # non-regression test for gh-10821 + x = np.array([3251637.22, 620695.44, 11642969.00, 2223468.22, + 85307500.00, 16494389.89, 917215.88, 11642969.00, + 2145773.87, 4962000.00, 620695.44, 651234.50, + 1907876.71, 4053297.88, 3251637.22, 3259103.08, + 9547969.00, 20631286.23, 12807072.08, 2383819.84, + 90114500.00, 17209575.46, 12852969.00, 2414609.99, + 2170368.23]) + xt_yeo, lam_yeo = stats.yeojohnson(x) + xt_box, lam_box = stats.boxcox(x + 1) + assert_allclose(xt_yeo, xt_box, rtol=1e-6) + assert_allclose(lam_yeo, lam_box, rtol=1e-6) + + @pytest.mark.parametrize('x', [ + np.array([1.0, float("nan"), 2.0]), + np.array([1.0, float("inf"), 2.0]), + np.array([1.0, -float("inf"), 2.0]), + np.array([-1.0, float("nan"), float("inf"), -float("inf"), 1.0]) + ]) + def test_nonfinite_input(self, x): + with pytest.raises(ValueError, match='Yeo-Johnson input must be finite'): + xt_yeo, lam_yeo = stats.yeojohnson(x) + + @pytest.mark.parametrize('x', [ + # Attempt to trigger overflow in power expressions. + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0, + 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]), + # Attempt to trigger overflow with a large optimal lambda. + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0]), + # Attempt to trigger overflow with large data. + np.array([2003.0e200, 1950.0e200, 1997.0e200, 2000.0e200, 2009.0e200]) + ]) + def test_overflow(self, x): + # non-regression test for gh-18389 + + def optimizer(fun, lam_yeo): + out = optimize.fminbound(fun, -lam_yeo, lam_yeo, xtol=1.48e-08) + result = optimize.OptimizeResult() + result.x = out + return result + + with np.errstate(all="raise"): + xt_yeo, lam_yeo = stats.yeojohnson(x) + xt_box, lam_box = stats.boxcox( + x + 1, optimizer=partial(optimizer, lam_yeo=lam_yeo)) + assert np.isfinite(np.var(xt_yeo)) + assert np.isfinite(np.var(xt_box)) + assert_allclose(lam_yeo, lam_box, rtol=1e-6) + assert_allclose(xt_yeo, xt_box, rtol=1e-4) + + @pytest.mark.parametrize('x', [ + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0, + 2009.0, 1980.0, 1999.0, 2007.0, 1991.0]), + np.array([2003.0, 1950.0, 1997.0, 2000.0, 2009.0]) + ]) + @pytest.mark.parametrize('scale', [1, 1e-12, 1e-32, 1e-150, 1e32, 1e200]) + @pytest.mark.parametrize('sign', [1, -1]) + def test_overflow_underflow_signed_data(self, x, scale, sign): + # non-regression test for gh-18389 + with np.errstate(all="raise"): + xt_yeo, lam_yeo = stats.yeojohnson(sign * x * scale) + assert np.all(np.sign(sign * x) == np.sign(xt_yeo)) + assert np.isfinite(lam_yeo) + assert np.isfinite(np.var(xt_yeo)) + + @pytest.mark.parametrize('x', [ + np.array([0, 1, 2, 3]), + np.array([0, -1, 2, -3]), + np.array([0, 0, 0]) + ]) + @pytest.mark.parametrize('sign', [1, -1]) + @pytest.mark.parametrize('brack', [None, (-2, 2)]) + def test_integer_signed_data(self, x, sign, brack): + with np.errstate(all="raise"): + x_int = sign * x + x_float = x_int.astype(np.float64) + lam_yeo_int = stats.yeojohnson_normmax(x_int, brack=brack) + xt_yeo_int = stats.yeojohnson(x_int, lmbda=lam_yeo_int) + lam_yeo_float = stats.yeojohnson_normmax(x_float, brack=brack) + xt_yeo_float = stats.yeojohnson(x_float, lmbda=lam_yeo_float) + assert np.all(np.sign(x_int) == np.sign(xt_yeo_int)) + assert np.isfinite(lam_yeo_int) + assert np.isfinite(np.var(xt_yeo_int)) + assert lam_yeo_int == lam_yeo_float + assert np.all(xt_yeo_int == xt_yeo_float) + + +class TestYeojohnsonNormmax: + def setup_method(self): + self.x = _old_loggamma_rvs(5, size=50, random_state=12345) + 5 + + def test_mle(self): + maxlog = stats.yeojohnson_normmax(self.x) + assert_allclose(maxlog, 1.876393, rtol=1e-6) + + def test_darwin_example(self): + # test from original paper "A new family of power transformations to + # improve normality or symmetry" by Yeo and Johnson. + x = [6.1, -8.4, 1.0, 2.0, 0.7, 2.9, 3.5, 5.1, 1.8, 3.6, 7.0, 3.0, 9.3, + 7.5, -6.0] + lmbda = stats.yeojohnson_normmax(x) + assert np.allclose(lmbda, 1.305, atol=1e-3) + + +class TestCircFuncs: + # In gh-5747, the R package `circular` was used to calculate reference + # values for the circular variance, e.g.: + # library(circular) + # options(digits=16) + # x = c(0, 2*pi/3, 5*pi/3) + # var.circular(x) + @pytest.mark.parametrize("test_func,expected", + [(stats.circmean, 0.167690146), + (stats.circvar, 0.006455174270186603), + (stats.circstd, 6.520702116)]) + def test_circfuncs(self, test_func, expected): + x = np.array([355, 5, 2, 359, 10, 350]) + assert_allclose(test_func(x, high=360), expected, rtol=1e-7) + + def test_circfuncs_small(self): + x = np.array([20, 21, 22, 18, 19, 20.5, 19.2]) + M1 = x.mean() + M2 = stats.circmean(x, high=360) + assert_allclose(M2, M1, rtol=1e-5) + + V1 = (x*np.pi/180).var() + # for small variations, circvar is approximately half the + # linear variance + V1 = V1 / 2. + V2 = stats.circvar(x, high=360) + assert_allclose(V2, V1, rtol=1e-4) + + S1 = x.std() + S2 = stats.circstd(x, high=360) + assert_allclose(S2, S1, rtol=1e-4) + + @pytest.mark.parametrize("test_func, numpy_func", + [(stats.circmean, np.mean), + (stats.circvar, np.var), + (stats.circstd, np.std)]) + def test_circfuncs_close(self, test_func, numpy_func): + # circfuncs should handle very similar inputs (gh-12740) + x = np.array([0.12675364631578953] * 10 + [0.12675365920187928] * 100) + circstat = test_func(x) + normal = numpy_func(x) + assert_allclose(circstat, normal, atol=2e-8) + + def test_circmean_axis(self): + x = np.array([[355, 5, 2, 359, 10, 350], + [351, 7, 4, 352, 9, 349], + [357, 9, 8, 358, 4, 356]]) + M1 = stats.circmean(x, high=360) + M2 = stats.circmean(x.ravel(), high=360) + assert_allclose(M1, M2, rtol=1e-14) + + M1 = stats.circmean(x, high=360, axis=1) + M2 = [stats.circmean(x[i], high=360) for i in range(x.shape[0])] + assert_allclose(M1, M2, rtol=1e-14) + + M1 = stats.circmean(x, high=360, axis=0) + M2 = [stats.circmean(x[:, i], high=360) for i in range(x.shape[1])] + assert_allclose(M1, M2, rtol=1e-14) + + def test_circvar_axis(self): + x = np.array([[355, 5, 2, 359, 10, 350], + [351, 7, 4, 352, 9, 349], + [357, 9, 8, 358, 4, 356]]) + + V1 = stats.circvar(x, high=360) + V2 = stats.circvar(x.ravel(), high=360) + assert_allclose(V1, V2, rtol=1e-11) + + V1 = stats.circvar(x, high=360, axis=1) + V2 = [stats.circvar(x[i], high=360) for i in range(x.shape[0])] + assert_allclose(V1, V2, rtol=1e-11) + + V1 = stats.circvar(x, high=360, axis=0) + V2 = [stats.circvar(x[:, i], high=360) for i in range(x.shape[1])] + assert_allclose(V1, V2, rtol=1e-11) + + def test_circstd_axis(self): + x = np.array([[355, 5, 2, 359, 10, 350], + [351, 7, 4, 352, 9, 349], + [357, 9, 8, 358, 4, 356]]) + + S1 = stats.circstd(x, high=360) + S2 = stats.circstd(x.ravel(), high=360) + assert_allclose(S1, S2, rtol=1e-11) + + S1 = stats.circstd(x, high=360, axis=1) + S2 = [stats.circstd(x[i], high=360) for i in range(x.shape[0])] + assert_allclose(S1, S2, rtol=1e-11) + + S1 = stats.circstd(x, high=360, axis=0) + S2 = [stats.circstd(x[:, i], high=360) for i in range(x.shape[1])] + assert_allclose(S1, S2, rtol=1e-11) + + @pytest.mark.parametrize("test_func,expected", + [(stats.circmean, 0.167690146), + (stats.circvar, 0.006455174270186603), + (stats.circstd, 6.520702116)]) + def test_circfuncs_array_like(self, test_func, expected): + x = [355, 5, 2, 359, 10, 350] + assert_allclose(test_func(x, high=360), expected, rtol=1e-7) + + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_empty(self, test_func): + assert_(np.isnan(test_func([]))) + + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_nan_propagate(self, test_func): + x = [355, 5, 2, 359, 10, 350, np.nan] + assert_(np.isnan(test_func(x, high=360))) + + @pytest.mark.parametrize("test_func,expected", + [(stats.circmean, + {None: np.nan, 0: 355.66582264, 1: 0.28725053}), + (stats.circvar, + {None: np.nan, + 0: 0.002570671054089924, + 1: 0.005545914017677123}), + (stats.circstd, + {None: np.nan, 0: 4.11093193, 1: 6.04265394})]) + def test_nan_propagate_array(self, test_func, expected): + x = np.array([[355, 5, 2, 359, 10, 350, 1], + [351, 7, 4, 352, 9, 349, np.nan], + [1, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) + for axis in expected.keys(): + out = test_func(x, high=360, axis=axis) + if axis is None: + assert_(np.isnan(out)) + else: + assert_allclose(out[0], expected[axis], rtol=1e-7) + assert_(np.isnan(out[1:]).all()) + + @pytest.mark.parametrize("test_func,expected", + [(stats.circmean, + {None: 359.4178026893944, + 0: np.array([353.0, 6.0, 3.0, 355.5, 9.5, + 349.5]), + 1: np.array([0.16769015, 358.66510252])}), + (stats.circvar, + {None: 0.008396678483192477, + 0: np.array([1.9997969, 0.4999873, 0.4999873, + 6.1230956, 0.1249992, 0.1249992] + )*(np.pi/180)**2, + 1: np.array([0.006455174270186603, + 0.01016767581393285])}), + (stats.circstd, + {None: 7.440570778057074, + 0: np.array([2.00020313, 1.00002539, 1.00002539, + 3.50108929, 0.50000317, + 0.50000317]), + 1: np.array([6.52070212, 8.19138093])})]) + def test_nan_omit_array(self, test_func, expected): + x = np.array([[355, 5, 2, 359, 10, 350, np.nan], + [351, 7, 4, 352, 9, 349, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]]) + for axis in expected.keys(): + out = test_func(x, high=360, nan_policy='omit', axis=axis) + if axis is None: + assert_allclose(out, expected[axis], rtol=1e-7) + else: + assert_allclose(out[:-1], expected[axis], rtol=1e-7) + assert_(np.isnan(out[-1])) + + @pytest.mark.parametrize("test_func,expected", + [(stats.circmean, 0.167690146), + (stats.circvar, 0.006455174270186603), + (stats.circstd, 6.520702116)]) + def test_nan_omit(self, test_func, expected): + x = [355, 5, 2, 359, 10, 350, np.nan] + assert_allclose(test_func(x, high=360, nan_policy='omit'), + expected, rtol=1e-7) + + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_nan_omit_all(self, test_func): + x = [np.nan, np.nan, np.nan, np.nan, np.nan] + assert_(np.isnan(test_func(x, nan_policy='omit'))) + + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_nan_omit_all_axis(self, test_func): + x = np.array([[np.nan, np.nan, np.nan, np.nan, np.nan], + [np.nan, np.nan, np.nan, np.nan, np.nan]]) + out = test_func(x, nan_policy='omit', axis=1) + assert_(np.isnan(out).all()) + assert_(len(out) == 2) + + @pytest.mark.parametrize("x", + [[355, 5, 2, 359, 10, 350, np.nan], + np.array([[355, 5, 2, 359, 10, 350, np.nan], + [351, 7, 4, 352, np.nan, 9, 349]])]) + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_nan_raise(self, test_func, x): + assert_raises(ValueError, test_func, x, high=360, nan_policy='raise') + + @pytest.mark.parametrize("x", + [[355, 5, 2, 359, 10, 350, np.nan], + np.array([[355, 5, 2, 359, 10, 350, np.nan], + [351, 7, 4, 352, np.nan, 9, 349]])]) + @pytest.mark.parametrize("test_func", [stats.circmean, stats.circvar, + stats.circstd]) + def test_bad_nan_policy(self, test_func, x): + assert_raises(ValueError, test_func, x, high=360, nan_policy='foobar') + + def test_circmean_scalar(self): + x = 1. + M1 = x + M2 = stats.circmean(x) + assert_allclose(M2, M1, rtol=1e-5) + + def test_circmean_range(self): + # regression test for gh-6420: circmean(..., high, low) must be + # between `high` and `low` + m = stats.circmean(np.arange(0, 2, 0.1), np.pi, -np.pi) + assert_(m < np.pi) + assert_(m > -np.pi) + + def test_circfuncs_uint8(self): + # regression test for gh-7255: overflow when working with + # numpy uint8 data type + x = np.array([150, 10], dtype='uint8') + assert_equal(stats.circmean(x, high=180), 170.0) + assert_allclose(stats.circvar(x, high=180), 0.2339555554617, rtol=1e-7) + assert_allclose(stats.circstd(x, high=180), 20.91551378, rtol=1e-7) + + +class TestMedianTest: + + def test_bad_n_samples(self): + # median_test requires at least two samples. + assert_raises(ValueError, stats.median_test, [1, 2, 3]) + + def test_empty_sample(self): + # Each sample must contain at least one value. + assert_raises(ValueError, stats.median_test, [], [1, 2, 3]) + + def test_empty_when_ties_ignored(self): + # The grand median is 1, and all values in the first argument are + # equal to the grand median. With ties="ignore", those values are + # ignored, which results in the first sample being (in effect) empty. + # This should raise a ValueError. + assert_raises(ValueError, stats.median_test, + [1, 1, 1, 1], [2, 0, 1], [2, 0], ties="ignore") + + def test_empty_contingency_row(self): + # The grand median is 1, and with the default ties="below", all the + # values in the samples are counted as being below the grand median. + # This would result a row of zeros in the contingency table, which is + # an error. + assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1]) + + # With ties="above", all the values are counted as above the + # grand median. + assert_raises(ValueError, stats.median_test, [1, 1, 1], [1, 1, 1], + ties="above") + + def test_bad_ties(self): + assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5], + ties="foo") + + def test_bad_nan_policy(self): + assert_raises(ValueError, stats.median_test, [1, 2, 3], [4, 5], + nan_policy='foobar') + + def test_bad_keyword(self): + assert_raises(TypeError, stats.median_test, [1, 2, 3], [4, 5], + foo="foo") + + def test_simple(self): + x = [1, 2, 3] + y = [1, 2, 3] + stat, p, med, tbl = stats.median_test(x, y) + + # The median is floating point, but this equality test should be safe. + assert_equal(med, 2.0) + + assert_array_equal(tbl, [[1, 1], [2, 2]]) + + # The expected values of the contingency table equal the contingency + # table, so the statistic should be 0 and the p-value should be 1. + assert_equal(stat, 0) + assert_equal(p, 1) + + def test_ties_options(self): + # Test the contingency table calculation. + x = [1, 2, 3, 4] + y = [5, 6] + z = [7, 8, 9] + # grand median is 5. + + # Default 'ties' option is "below". + stat, p, m, tbl = stats.median_test(x, y, z) + assert_equal(m, 5) + assert_equal(tbl, [[0, 1, 3], [4, 1, 0]]) + + stat, p, m, tbl = stats.median_test(x, y, z, ties="ignore") + assert_equal(m, 5) + assert_equal(tbl, [[0, 1, 3], [4, 0, 0]]) + + stat, p, m, tbl = stats.median_test(x, y, z, ties="above") + assert_equal(m, 5) + assert_equal(tbl, [[0, 2, 3], [4, 0, 0]]) + + def test_nan_policy_options(self): + x = [1, 2, np.nan] + y = [4, 5, 6] + mt1 = stats.median_test(x, y, nan_policy='propagate') + s, p, m, t = stats.median_test(x, y, nan_policy='omit') + + assert_equal(mt1, (np.nan, np.nan, np.nan, None)) + assert_allclose(s, 0.31250000000000006) + assert_allclose(p, 0.57615012203057869) + assert_equal(m, 4.0) + assert_equal(t, np.array([[0, 2], [2, 1]])) + assert_raises(ValueError, stats.median_test, x, y, nan_policy='raise') + + def test_basic(self): + # median_test calls chi2_contingency to compute the test statistic + # and p-value. Make sure it hasn't screwed up the call... + + x = [1, 2, 3, 4, 5] + y = [2, 4, 6, 8] + + stat, p, m, tbl = stats.median_test(x, y) + assert_equal(m, 4) + assert_equal(tbl, [[1, 2], [4, 2]]) + + exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl) + assert_allclose(stat, exp_stat) + assert_allclose(p, exp_p) + + stat, p, m, tbl = stats.median_test(x, y, lambda_=0) + assert_equal(m, 4) + assert_equal(tbl, [[1, 2], [4, 2]]) + + exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, lambda_=0) + assert_allclose(stat, exp_stat) + assert_allclose(p, exp_p) + + stat, p, m, tbl = stats.median_test(x, y, correction=False) + assert_equal(m, 4) + assert_equal(tbl, [[1, 2], [4, 2]]) + + exp_stat, exp_p, dof, e = stats.chi2_contingency(tbl, correction=False) + assert_allclose(stat, exp_stat) + assert_allclose(p, exp_p) + + @pytest.mark.parametrize("correction", [False, True]) + def test_result(self, correction): + x = [1, 2, 3] + y = [1, 2, 3] + + res = stats.median_test(x, y, correction=correction) + assert_equal((res.statistic, res.pvalue, res.median, res.table), res) + + +class TestDirectionalStats: + # Reference implementations are not available + def test_directional_stats_correctness(self): + # Data from Fisher: Dispersion on a sphere, 1953 and + # Mardia and Jupp, Directional Statistics. + + decl = -np.deg2rad(np.array([343.2, 62., 36.9, 27., 359., + 5.7, 50.4, 357.6, 44.])) + incl = -np.deg2rad(np.array([66.1, 68.7, 70.1, 82.1, 79.5, + 73., 69.3, 58.8, 51.4])) + data = np.stack((np.cos(incl) * np.cos(decl), + np.cos(incl) * np.sin(decl), + np.sin(incl)), + axis=1) + + dirstats = stats.directional_stats(data) + directional_mean = dirstats.mean_direction + mean_rounded = np.round(directional_mean, 4) + + reference_mean = np.array([0.2984, -0.1346, -0.9449]) + assert_allclose(mean_rounded, reference_mean) + + @pytest.mark.parametrize('angles, ref', [ + ([-np.pi/2, np.pi/2], 1.), + ([0, 2*np.pi], 0.) + ]) + def test_directional_stats_2d_special_cases(self, angles, ref): + if callable(ref): + ref = ref(angles) + data = np.stack([np.cos(angles), np.sin(angles)], axis=1) + res = 1 - stats.directional_stats(data).mean_resultant_length + assert_allclose(res, ref) + + def test_directional_stats_2d(self): + # Test that for circular data directional_stats + # yields the same result as circmean/circvar + rng = np.random.default_rng(0xec9a6899d5a2830e0d1af479dbe1fd0c) + testdata = 2 * np.pi * rng.random((1000, )) + testdata_vector = np.stack((np.cos(testdata), + np.sin(testdata)), + axis=1) + dirstats = stats.directional_stats(testdata_vector) + directional_mean = dirstats.mean_direction + directional_mean_angle = np.arctan2(directional_mean[1], + directional_mean[0]) + directional_mean_angle = directional_mean_angle % (2*np.pi) + circmean = stats.circmean(testdata) + assert_allclose(circmean, directional_mean_angle) + + directional_var = 1 - dirstats.mean_resultant_length + circular_var = stats.circvar(testdata) + assert_allclose(directional_var, circular_var) + + def test_directional_mean_higher_dim(self): + # test that directional_stats works for higher dimensions + # here a 4D array is reduced over axis = 2 + data = np.array([[0.8660254, 0.5, 0.], + [0.8660254, -0.5, 0.]]) + full_array = np.tile(data, (2, 2, 2, 1)) + expected = np.array([[[1., 0., 0.], + [1., 0., 0.]], + [[1., 0., 0.], + [1., 0., 0.]]]) + dirstats = stats.directional_stats(full_array, axis=2) + assert_allclose(expected, dirstats.mean_direction) + + def test_directional_stats_list_ndarray_input(self): + # test that list and numpy array inputs yield same results + data = [[0.8660254, 0.5, 0.], [0.8660254, -0.5, 0]] + data_array = np.asarray(data) + res = stats.directional_stats(data) + ref = stats.directional_stats(data_array) + assert_allclose(res.mean_direction, ref.mean_direction) + assert_allclose(res.mean_resultant_length, + res.mean_resultant_length) + + def test_directional_stats_1d_error(self): + # test that one-dimensional data raises ValueError + data = np.ones((5, )) + message = (r"samples must at least be two-dimensional. " + r"Instead samples has shape: (5,)") + with pytest.raises(ValueError, match=re.escape(message)): + stats.directional_stats(data) + + def test_directional_stats_normalize(self): + # test that directional stats calculations yield same results + # for unnormalized input with normalize=True and normalized + # input with normalize=False + data = np.array([[0.8660254, 0.5, 0.], + [1.7320508, -1., 0.]]) + res = stats.directional_stats(data, normalize=True) + normalized_data = data / np.linalg.norm(data, axis=-1, + keepdims=True) + ref = stats.directional_stats(normalized_data, + normalize=False) + assert_allclose(res.mean_direction, ref.mean_direction) + assert_allclose(res.mean_resultant_length, + ref.mean_resultant_length) + + +class TestFDRControl: + def test_input_validation(self): + message = "`ps` must include only numbers between 0 and 1" + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([-1, 0.5, 0.7]) + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([0.5, 0.7, 2]) + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([0.5, 0.7, np.nan]) + + message = "Unrecognized `method` 'YAK'" + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([0.5, 0.7, 0.9], method='YAK') + + message = "`axis` must be an integer or `None`" + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([0.5, 0.7, 0.9], axis=1.5) + with pytest.raises(ValueError, match=message): + stats.false_discovery_control([0.5, 0.7, 0.9], axis=(1, 2)) + + def test_against_TileStats(self): + # See reference [3] of false_discovery_control + ps = [0.005, 0.009, 0.019, 0.022, 0.051, 0.101, 0.361, 0.387] + res = stats.false_discovery_control(ps) + ref = [0.036, 0.036, 0.044, 0.044, 0.082, 0.135, 0.387, 0.387] + assert_allclose(res, ref, atol=1e-3) + + @pytest.mark.parametrize("case", + [([0.24617028, 0.01140030, 0.05652047, 0.06841983, + 0.07989886, 0.01841490, 0.17540784, 0.06841983, + 0.06841983, 0.25464082], 'bh'), + ([0.72102493, 0.03339112, 0.16554665, 0.20039952, + 0.23402122, 0.05393666, 0.51376399, 0.20039952, + 0.20039952, 0.74583488], 'by')]) + def test_against_R(self, case): + # Test against p.adjust, e.g. + # p = c(0.22155325, 0.00114003,..., 0.0364813 , 0.25464082) + # p.adjust(p, "BY") + ref, method = case + rng = np.random.default_rng(6134137338861652935) + ps = stats.loguniform.rvs(1e-3, 0.5, size=10, random_state=rng) + ps[3] = ps[7] # force a tie + res = stats.false_discovery_control(ps, method=method) + assert_allclose(res, ref, atol=1e-6) + + def test_axis_None(self): + rng = np.random.default_rng(6134137338861652935) + ps = stats.loguniform.rvs(1e-3, 0.5, size=(3, 4, 5), random_state=rng) + res = stats.false_discovery_control(ps, axis=None) + ref = stats.false_discovery_control(ps.ravel()) + assert_equal(res, ref) + + @pytest.mark.parametrize("axis", [0, 1, -1]) + def test_axis(self, axis): + rng = np.random.default_rng(6134137338861652935) + ps = stats.loguniform.rvs(1e-3, 0.5, size=(3, 4, 5), random_state=rng) + res = stats.false_discovery_control(ps, axis=axis) + ref = np.apply_along_axis(stats.false_discovery_control, axis, ps) + assert_equal(res, ref) + + def test_edge_cases(self): + assert_array_equal(stats.false_discovery_control([0.25]), [0.25]) + assert_array_equal(stats.false_discovery_control(0.25), 0.25) + assert_array_equal(stats.false_discovery_control([]), []) diff --git a/.venv/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py b/.venv/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py new file mode 100644 index 0000000000000000000000000000000000000000..0de8d0c921fcc737c11dab4dc21243f2871f621a --- /dev/null +++ b/.venv/Lib/site-packages/scipy/stats/tests/test_mstats_basic.py @@ -0,0 +1,2053 @@ +""" +Tests for the stats.mstats module (support for masked arrays) +""" +import warnings +import platform + +import numpy as np +from numpy import nan +import numpy.ma as ma +from numpy.ma import masked, nomask + +import scipy.stats.mstats as mstats +from scipy import stats +from .common_tests import check_named_results +import pytest +from pytest import raises as assert_raises +from numpy.ma.testutils import (assert_equal, assert_almost_equal, + assert_array_almost_equal, + assert_array_almost_equal_nulp, assert_, + assert_allclose, assert_array_equal) +from numpy.testing import suppress_warnings +from scipy.stats import _mstats_basic + + +class TestMquantiles: + def test_mquantiles_limit_keyword(self): + # Regression test for Trac ticket #867 + data = np.array([[6., 7., 1.], + [47., 15., 2.], + [49., 36., 3.], + [15., 39., 4.], + [42., 40., -999.], + [41., 41., -999.], + [7., -999., -999.], + [39., -999., -999.], + [43., -999., -999.], + [40., -999., -999.], + [36., -999., -999.]]) + desired = [[19.2, 14.6, 1.45], + [40.0, 37.5, 2.5], + [42.8, 40.05, 3.55]] + quants = mstats.mquantiles(data, axis=0, limit=(0, 50)) + assert_almost_equal(quants, desired) + + +def check_equal_gmean(array_like, desired, axis=None, dtype=None, rtol=1e-7): + # Note this doesn't test when axis is not specified + x = mstats.gmean(array_like, axis=axis, dtype=dtype) + assert_allclose(x, desired, rtol=rtol) + assert_equal(x.dtype, dtype) + + +def check_equal_hmean(array_like, desired, axis=None, dtype=None, rtol=1e-7): + x = stats.hmean(array_like, axis=axis, dtype=dtype) + assert_allclose(x, desired, rtol=rtol) + assert_equal(x.dtype, dtype) + + +class TestGeoMean: + def test_1d(self): + a = [1, 2, 3, 4] + desired = np.power(1*2*3*4, 1./4.) + check_equal_gmean(a, desired, rtol=1e-14) + + def test_1d_ma(self): + # Test a 1d masked array + a = ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) + desired = 45.2872868812 + check_equal_gmean(a, desired) + + a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1]) + desired = np.power(1*2*3, 1./3.) + check_equal_gmean(a, desired, rtol=1e-14) + + def test_1d_ma_value(self): + # Test a 1d masked array with a masked value + a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100], + mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) + desired = 41.4716627439 + check_equal_gmean(a, desired) + + def test_1d_ma0(self): + # Test a 1d masked array with zero element + a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 0]) + desired = 0 + check_equal_gmean(a, desired) + + def test_1d_ma_inf(self): + # Test a 1d masked array with negative element + a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, -1]) + desired = np.nan + with np.errstate(invalid='ignore'): + check_equal_gmean(a, desired) + + @pytest.mark.skipif(not hasattr(np, 'float96'), + reason='cannot find float96 so skipping') + def test_1d_float96(self): + a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1]) + desired_dt = np.power(1*2*3, 1./3.).astype(np.float96) + check_equal_gmean(a, desired_dt, dtype=np.float96, rtol=1e-14) + + def test_2d_ma(self): + a = ma.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + mask=[[0, 0, 0, 0], [1, 0, 0, 1], [0, 1, 1, 0]]) + desired = np.array([1, 2, 3, 4]) + check_equal_gmean(a, desired, axis=0, rtol=1e-14) + + desired = ma.array([np.power(1*2*3*4, 1./4.), + np.power(2*3, 1./2.), + np.power(1*4, 1./2.)]) + check_equal_gmean(a, desired, axis=-1, rtol=1e-14) + + # Test a 2d masked array + a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] + desired = 52.8885199 + check_equal_gmean(np.ma.array(a), desired) + + +class TestHarMean: + def test_1d(self): + a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1]) + desired = 3. / (1./1 + 1./2 + 1./3) + check_equal_hmean(a, desired, rtol=1e-14) + + a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) + desired = 34.1417152147 + check_equal_hmean(a, desired) + + a = np.ma.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100], + mask=[0, 0, 0, 0, 0, 0, 0, 0, 0, 1]) + desired = 31.8137186141 + check_equal_hmean(a, desired) + + @pytest.mark.skipif(not hasattr(np, 'float96'), + reason='cannot find float96 so skipping') + def test_1d_float96(self): + a = ma.array([1, 2, 3, 4], mask=[0, 0, 0, 1]) + desired_dt = np.asarray(3. / (1./1 + 1./2 + 1./3), dtype=np.float96) + check_equal_hmean(a, desired_dt, dtype=np.float96) + + def test_2d(self): + a = ma.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], + mask=[[0, 0, 0, 0], [1, 0, 0, 1], [0, 1, 1, 0]]) + desired = ma.array([1, 2, 3, 4]) + check_equal_hmean(a, desired, axis=0, rtol=1e-14) + + desired = [4./(1/1.+1/2.+1/3.+1/4.), 2./(1/2.+1/3.), 2./(1/1.+1/4.)] + check_equal_hmean(a, desired, axis=-1, rtol=1e-14) + + a = [[10, 20, 30, 40], [50, 60, 70, 80], [90, 100, 110, 120]] + desired = 38.6696271841 + check_equal_hmean(np.ma.array(a), desired) + + +class TestRanking: + def test_ranking(self): + x = ma.array([0,1,1,1,2,3,4,5,5,6,]) + assert_almost_equal(mstats.rankdata(x), + [1,3,3,3,5,6,7,8.5,8.5,10]) + x[[3,4]] = masked + assert_almost_equal(mstats.rankdata(x), + [1,2.5,2.5,0,0,4,5,6.5,6.5,8]) + assert_almost_equal(mstats.rankdata(x, use_missing=True), + [1,2.5,2.5,4.5,4.5,4,5,6.5,6.5,8]) + x = ma.array([0,1,5,1,2,4,3,5,1,6,]) + assert_almost_equal(mstats.rankdata(x), + [1,3,8.5,3,5,7,6,8.5,3,10]) + x = ma.array([[0,1,1,1,2], [3,4,5,5,6,]]) + assert_almost_equal(mstats.rankdata(x), + [[1,3,3,3,5], [6,7,8.5,8.5,10]]) + assert_almost_equal(mstats.rankdata(x, axis=1), + [[1,3,3,3,5], [1,2,3.5,3.5,5]]) + assert_almost_equal(mstats.rankdata(x,axis=0), + [[1,1,1,1,1], [2,2,2,2,2,]]) + + +class TestCorr: + def test_pearsonr(self): + # Tests some computations of Pearson's r + x = ma.arange(10) + with warnings.catch_warnings(): + # The tests in this context are edge cases, with perfect + # correlation or anticorrelation, or totally masked data. + # None of these should trigger a RuntimeWarning. + warnings.simplefilter("error", RuntimeWarning) + + assert_almost_equal(mstats.pearsonr(x, x)[0], 1.0) + assert_almost_equal(mstats.pearsonr(x, x[::-1])[0], -1.0) + + x = ma.array(x, mask=True) + pr = mstats.pearsonr(x, x) + assert_(pr[0] is masked) + assert_(pr[1] is masked) + + x1 = ma.array([-1.0, 0.0, 1.0]) + y1 = ma.array([0, 0, 3]) + r, p = mstats.pearsonr(x1, y1) + assert_almost_equal(r, np.sqrt(3)/2) + assert_almost_equal(p, 1.0/3) + + # (x2, y2) have the same unmasked data as (x1, y1). + mask = [False, False, False, True] + x2 = ma.array([-1.0, 0.0, 1.0, 99.0], mask=mask) + y2 = ma.array([0, 0, 3, -1], mask=mask) + r, p = mstats.pearsonr(x2, y2) + assert_almost_equal(r, np.sqrt(3)/2) + assert_almost_equal(p, 1.0/3) + + def test_pearsonr_misaligned_mask(self): + mx = np.ma.masked_array([1, 2, 3, 4, 5, 6], mask=[0, 1, 0, 0, 0, 0]) + my = np.ma.masked_array([9, 8, 7, 6, 5, 9], mask=[0, 0, 1, 0, 0, 0]) + x = np.array([1, 4, 5, 6]) + y = np.array([9, 6, 5, 9]) + mr, mp = mstats.pearsonr(mx, my) + r, p = stats.pearsonr(x, y) + assert_equal(mr, r) + assert_equal(mp, p) + + def test_spearmanr(self): + # Tests some computations of Spearman's rho + (x, y) = ([5.05,6.75,3.21,2.66], [1.65,2.64,2.64,6.95]) + assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) + (x, y) = ([5.05,6.75,3.21,2.66,np.nan],[1.65,2.64,2.64,6.95,np.nan]) + (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) + assert_almost_equal(mstats.spearmanr(x,y)[0], -0.6324555) + + x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, + 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] + y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, + 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] + assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) + x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, + 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7, np.nan] + y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, + 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4, np.nan] + (x, y) = (ma.fix_invalid(x), ma.fix_invalid(y)) + assert_almost_equal(mstats.spearmanr(x,y)[0], 0.6887299) + # Next test is to make sure calculation uses sufficient precision. + # The denominator's value is ~n^3 and used to be represented as an + # int. 2000**3 > 2**32 so these arrays would cause overflow on + # some machines. + x = list(range(2000)) + y = list(range(2000)) + y[0], y[9] = y[9], y[0] + y[10], y[434] = y[434], y[10] + y[435], y[1509] = y[1509], y[435] + # rho = 1 - 6 * (2 * (9^2 + 424^2 + 1074^2))/(2000 * (2000^2 - 1)) + # = 1 - (1 / 500) + # = 0.998 + assert_almost_equal(mstats.spearmanr(x,y)[0], 0.998) + + # test for namedtuple attributes + res = mstats.spearmanr(x, y) + attributes = ('correlation', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_spearmanr_alternative(self): + # check against R + # options(digits=16) + # cor.test(c(2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, + # 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7), + # c(22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, + # 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4), + # alternative='two.sided', method='spearman') + x = [2.0, 47.4, 42.0, 10.8, 60.1, 1.7, 64.0, 63.1, + 1.0, 1.4, 7.9, 0.3, 3.9, 0.3, 6.7] + y = [22.6, 8.3, 44.4, 11.9, 24.6, 0.6, 5.7, 41.6, + 0.0, 0.6, 6.7, 3.8, 1.0, 1.2, 1.4] + + r_exp = 0.6887298747763864 # from cor.test + + r, p = mstats.spearmanr(x, y) + assert_allclose(r, r_exp) + assert_allclose(p, 0.004519192910756) + + r, p = mstats.spearmanr(x, y, alternative='greater') + assert_allclose(r, r_exp) + assert_allclose(p, 0.002259596455378) + + r, p = mstats.spearmanr(x, y, alternative='less') + assert_allclose(r, r_exp) + assert_allclose(p, 0.9977404035446) + + # intuitive test (with obvious positive correlation) + n = 100 + x = np.linspace(0, 5, n) + y = 0.1*x + np.random.rand(n) # y is positively correlated w/ x + + stat1, p1 = mstats.spearmanr(x, y) + + stat2, p2 = mstats.spearmanr(x, y, alternative="greater") + assert_allclose(p2, p1 / 2) # positive correlation -> small p + + stat3, p3 = mstats.spearmanr(x, y, alternative="less") + assert_allclose(p3, 1 - p1 / 2) # positive correlation -> large p + + assert stat1 == stat2 == stat3 + + with pytest.raises(ValueError, match="alternative must be 'less'..."): + mstats.spearmanr(x, y, alternative="ekki-ekki") + + @pytest.mark.skipif(platform.machine() == 'ppc64le', + reason="fails/crashes on ppc64le") + def test_kendalltau(self): + # check case with maximum disorder and p=1 + x = ma.array(np.array([9, 2, 5, 6])) + y = ma.array(np.array([4, 7, 9, 11])) + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [0.0, 1.0] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # simple case without ties + x = ma.array(np.arange(10)) + y = ma.array(np.arange(10)) + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [1.0, 5.511463844797e-07] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # check exception in case of invalid method keyword + assert_raises(ValueError, mstats.kendalltau, x, y, method='banana') + + # swap a couple of values + b = y[1] + y[1] = y[2] + y[2] = b + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [0.9555555555555556, 5.511463844797e-06] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # swap a couple more + b = y[5] + y[5] = y[6] + y[6] = b + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [0.9111111111111111, 2.976190476190e-05] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # same in opposite direction + x = ma.array(np.arange(10)) + y = ma.array(np.arange(10)[::-1]) + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [-1.0, 5.511463844797e-07] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # swap a couple of values + b = y[1] + y[1] = y[2] + y[2] = b + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [-0.9555555555555556, 5.511463844797e-06] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # swap a couple more + b = y[5] + y[5] = y[6] + y[6] = b + # Cross-check with exact result from R: + # cor.test(x,y,method="kendall",exact=1) + expected = [-0.9111111111111111, 2.976190476190e-05] + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), expected) + + # Tests some computations of Kendall's tau + x = ma.fix_invalid([5.05, 6.75, 3.21, 2.66, np.nan]) + y = ma.fix_invalid([1.65, 26.5, -5.93, 7.96, np.nan]) + z = ma.fix_invalid([1.65, 2.64, 2.64, 6.95, np.nan]) + assert_almost_equal(np.asarray(mstats.kendalltau(x, y)), + [+0.3333333, 0.75]) + assert_almost_equal(np.asarray(mstats.kendalltau(x, y, method='asymptotic')), + [+0.3333333, 0.4969059]) + assert_almost_equal(np.asarray(mstats.kendalltau(x, z)), + [-0.5477226, 0.2785987]) + # + x = ma.fix_invalid([0, 0, 0, 0, 20, 20, 0, 60, 0, 20, + 10, 10, 0, 40, 0, 20, 0, 0, 0, 0, 0, np.nan]) + y = ma.fix_invalid([0, 80, 80, 80, 10, 33, 60, 0, 67, 27, + 25, 80, 80, 80, 80, 80, 80, 0, 10, 45, np.nan, 0]) + result = mstats.kendalltau(x, y) + assert_almost_equal(np.asarray(result), [-0.1585188, 0.4128009]) + + # test for namedtuple attributes + attributes = ('correlation', 'pvalue') + check_named_results(result, attributes, ma=True) + + @pytest.mark.skipif(platform.machine() == 'ppc64le', + reason="fails/crashes on ppc64le") + @pytest.mark.slow + def test_kendalltau_large(self): + # make sure internal variable use correct precision with + # larger arrays + x = np.arange(2000, dtype=float) + x = ma.masked_greater(x, 1995) + y = np.arange(2000, dtype=float) + y = np.concatenate((y[1000:], y[:1000])) + assert_(np.isfinite(mstats.kendalltau(x, y)[1])) + + def test_kendalltau_seasonal(self): + # Tests the seasonal Kendall tau. + x = [[nan, nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], + [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], + [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan], + [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]] + x = ma.fix_invalid(x).T + output = mstats.kendalltau_seasonal(x) + assert_almost_equal(output['global p-value (indep)'], 0.008, 3) + assert_almost_equal(output['seasonal p-value'].round(2), + [0.18,0.53,0.20,0.04]) + + @pytest.mark.parametrize("method", ("exact", "asymptotic")) + @pytest.mark.parametrize("alternative", ("two-sided", "greater", "less")) + def test_kendalltau_mstats_vs_stats(self, method, alternative): + # Test that mstats.kendalltau and stats.kendalltau with + # nan_policy='omit' matches behavior of stats.kendalltau + # Accuracy of the alternatives is tested in stats/tests/test_stats.py + + np.random.seed(0) + n = 50 + x = np.random.rand(n) + y = np.random.rand(n) + mask = np.random.rand(n) > 0.5 + + x_masked = ma.array(x, mask=mask) + y_masked = ma.array(y, mask=mask) + res_masked = mstats.kendalltau( + x_masked, y_masked, method=method, alternative=alternative) + + x_compressed = x_masked.compressed() + y_compressed = y_masked.compressed() + res_compressed = stats.kendalltau( + x_compressed, y_compressed, method=method, alternative=alternative) + + x[mask] = np.nan + y[mask] = np.nan + res_nan = stats.kendalltau( + x, y, method=method, nan_policy='omit', alternative=alternative) + + assert_allclose(res_masked, res_compressed) + assert_allclose(res_nan, res_compressed) + + def test_kendall_p_exact_medium(self): + # Test for the exact method with medium samples (some n >= 171) + # expected values generated using SymPy + expectations = {(100, 2393): 0.62822615287956040664, + (101, 2436): 0.60439525773513602669, + (170, 0): 2.755801935583541e-307, + (171, 0): 0.0, + (171, 1): 2.755801935583541e-307, + (172, 1): 0.0, + (200, 9797): 0.74753983745929675209, + (201, 9656): 0.40959218958120363618} + for nc, expected in expectations.items(): + res = _mstats_basic._kendall_p_exact(nc[0], nc[1]) + assert_almost_equal(res, expected) + + @pytest.mark.xslow + def test_kendall_p_exact_large(self): + # Test for the exact method with large samples (n >= 171) + # expected values generated using SymPy + expectations = {(400, 38965): 0.48444283672113314099, + (401, 39516): 0.66363159823474837662, + (800, 156772): 0.42265448483120932055, + (801, 157849): 0.53437553412194416236, + (1600, 637472): 0.84200727400323538419, + (1601, 630304): 0.34465255088058593946} + + for nc, expected in expectations.items(): + res = _mstats_basic._kendall_p_exact(nc[0], nc[1]) + assert_almost_equal(res, expected) + + def test_pointbiserial(self): + x = [1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, + 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, -1] + y = [14.8, 13.8, 12.4, 10.1, 7.1, 6.1, 5.8, 4.6, 4.3, 3.5, 3.3, 3.2, + 3.0, 2.8, 2.8, 2.5, 2.4, 2.3, 2.1, 1.7, 1.7, 1.5, 1.3, 1.3, 1.2, + 1.2, 1.1, 0.8, 0.7, 0.6, 0.5, 0.2, 0.2, 0.1, np.nan] + assert_almost_equal(mstats.pointbiserialr(x, y)[0], 0.36149, 5) + + # test for namedtuple attributes + res = mstats.pointbiserialr(x, y) + attributes = ('correlation', 'pvalue') + check_named_results(res, attributes, ma=True) + + +class TestTrimming: + + def test_trim(self): + a = ma.arange(10) + assert_equal(mstats.trim(a), [0,1,2,3,4,5,6,7,8,9]) + a = ma.arange(10) + assert_equal(mstats.trim(a,(2,8)), [None,None,2,3,4,5,6,7,8,None]) + a = ma.arange(10) + assert_equal(mstats.trim(a,limits=(2,8),inclusive=(False,False)), + [None,None,None,3,4,5,6,7,None,None]) + a = ma.arange(10) + assert_equal(mstats.trim(a,limits=(0.1,0.2),relative=True), + [None,1,2,3,4,5,6,7,None,None]) + + a = ma.arange(12) + a[[0,-1]] = a[5] = masked + assert_equal(mstats.trim(a, (2,8)), + [None, None, 2, 3, 4, None, 6, 7, 8, None, None, None]) + + x = ma.arange(100).reshape(10, 10) + expected = [1]*10 + [0]*70 + [1]*20 + trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None) + assert_equal(trimx._mask.ravel(), expected) + trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0) + assert_equal(trimx._mask.ravel(), expected) + trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=-1) + assert_equal(trimx._mask.T.ravel(), expected) + + # same as above, but with an extra masked row inserted + x = ma.arange(110).reshape(11, 10) + x[1] = masked + expected = [1]*20 + [0]*70 + [1]*20 + trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=None) + assert_equal(trimx._mask.ravel(), expected) + trimx = mstats.trim(x, (0.1,0.2), relative=True, axis=0) + assert_equal(trimx._mask.ravel(), expected) + trimx = mstats.trim(x.T, (0.1,0.2), relative=True, axis=-1) + assert_equal(trimx.T._mask.ravel(), expected) + + def test_trim_old(self): + x = ma.arange(100) + assert_equal(mstats.trimboth(x).count(), 60) + assert_equal(mstats.trimtail(x,tail='r').count(), 80) + x[50:70] = masked + trimx = mstats.trimboth(x) + assert_equal(trimx.count(), 48) + assert_equal(trimx._mask, [1]*16 + [0]*34 + [1]*20 + [0]*14 + [1]*16) + x._mask = nomask + x.shape = (10,10) + assert_equal(mstats.trimboth(x).count(), 60) + assert_equal(mstats.trimtail(x).count(), 80) + + def test_trimr(self): + x = ma.arange(10) + result = mstats.trimr(x, limits=(0.15, 0.14), inclusive=(False, False)) + expected = ma.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], + mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 1]) + assert_equal(result, expected) + assert_equal(result.mask, expected.mask) + + def test_trimmedmean(self): + data = ma.array([77, 87, 88,114,151,210,219,246,253,262, + 296,299,306,376,428,515,666,1310,2611]) + assert_almost_equal(mstats.trimmed_mean(data,0.1), 343, 0) + assert_almost_equal(mstats.trimmed_mean(data,(0.1,0.1)), 343, 0) + assert_almost_equal(mstats.trimmed_mean(data,(0.2,0.2)), 283, 0) + + def test_trimmedvar(self): + # Basic test. Additional tests of all arguments, edge cases, + # input validation, and proper treatment of masked arrays are needed. + rng = np.random.default_rng(3262323289434724460) + data_orig = rng.random(size=20) + data = np.sort(data_orig) + data = ma.array(data, mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) + assert_allclose(mstats.trimmed_var(data_orig, 0.1), data.var()) + + def test_trimmedstd(self): + # Basic test. Additional tests of all arguments, edge cases, + # input validation, and proper treatment of masked arrays are needed. + rng = np.random.default_rng(7121029245207162780) + data_orig = rng.random(size=20) + data = np.sort(data_orig) + data = ma.array(data, mask=[1, 1, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]) + assert_allclose(mstats.trimmed_std(data_orig, 0.1), data.std()) + + def test_trimmed_stde(self): + data = ma.array([77, 87, 88,114,151,210,219,246,253,262, + 296,299,306,376,428,515,666,1310,2611]) + assert_almost_equal(mstats.trimmed_stde(data,(0.2,0.2)), 56.13193, 5) + assert_almost_equal(mstats.trimmed_stde(data,0.2), 56.13193, 5) + + def test_winsorization(self): + data = ma.array([77, 87, 88,114,151,210,219,246,253,262, + 296,299,306,376,428,515,666,1310,2611]) + assert_almost_equal(mstats.winsorize(data,(0.2,0.2)).var(ddof=1), + 21551.4, 1) + assert_almost_equal( + mstats.winsorize(data, (0.2,0.2),(False,False)).var(ddof=1), + 11887.3, 1) + data[5] = masked + winsorized = mstats.winsorize(data) + assert_equal(winsorized.mask, data.mask) + + def test_winsorization_nan(self): + data = ma.array([np.nan, np.nan, 0, 1, 2]) + assert_raises(ValueError, mstats.winsorize, data, (0.05, 0.05), + nan_policy='raise') + # Testing propagate (default behavior) + assert_equal(mstats.winsorize(data, (0.4, 0.4)), + ma.array([2, 2, 2, 2, 2])) + assert_equal(mstats.winsorize(data, (0.8, 0.8)), + ma.array([np.nan, np.nan, np.nan, np.nan, np.nan])) + assert_equal(mstats.winsorize(data, (0.4, 0.4), nan_policy='omit'), + ma.array([np.nan, np.nan, 2, 2, 2])) + assert_equal(mstats.winsorize(data, (0.8, 0.8), nan_policy='omit'), + ma.array([np.nan, np.nan, 2, 2, 2])) + + +class TestMoments: + # Comparison numbers are found using R v.1.5.1 + # note that length(testcase) = 4 + # testmathworks comes from documentation for the + # Statistics Toolbox for Matlab and can be found at both + # https://www.mathworks.com/help/stats/kurtosis.html + # https://www.mathworks.com/help/stats/skewness.html + # Note that both test cases came from here. + testcase = [1,2,3,4] + testmathworks = ma.fix_invalid([1.165, 0.6268, 0.0751, 0.3516, -0.6965, + np.nan]) + testcase_2d = ma.array( + np.array([[0.05245846, 0.50344235, 0.86589117, 0.36936353, 0.46961149], + [0.11574073, 0.31299969, 0.45925772, 0.72618805, 0.75194407], + [0.67696689, 0.91878127, 0.09769044, 0.04645137, 0.37615733], + [0.05903624, 0.29908861, 0.34088298, 0.66216337, 0.83160998], + [0.64619526, 0.94894632, 0.27855892, 0.0706151, 0.39962917]]), + mask=np.array([[True, False, False, True, False], + [True, True, True, False, True], + [False, False, False, False, False], + [True, True, True, True, True], + [False, False, True, False, False]], dtype=bool)) + + def _assert_equal(self, actual, expect, *, shape=None, dtype=None): + expect = np.asarray(expect) + if shape is not None: + expect = np.broadcast_to(expect, shape) + assert_array_equal(actual, expect) + if dtype is None: + dtype = expect.dtype + assert actual.dtype == dtype + + def test_moment(self): + y = mstats.moment(self.testcase,1) + assert_almost_equal(y,0.0,10) + y = mstats.moment(self.testcase,2) + assert_almost_equal(y,1.25) + y = mstats.moment(self.testcase,3) + assert_almost_equal(y,0.0) + y = mstats.moment(self.testcase,4) + assert_almost_equal(y,2.5625) + + # check array_like input for moment + y = mstats.moment(self.testcase, [1, 2, 3, 4]) + assert_allclose(y, [0, 1.25, 0, 2.5625]) + + # check moment input consists only of integers + y = mstats.moment(self.testcase, 0.0) + assert_allclose(y, 1.0) + assert_raises(ValueError, mstats.moment, self.testcase, 1.2) + y = mstats.moment(self.testcase, [1.0, 2, 3, 4.0]) + assert_allclose(y, [0, 1.25, 0, 2.5625]) + + # test empty input + y = mstats.moment([]) + self._assert_equal(y, np.nan, dtype=np.float64) + y = mstats.moment(np.array([], dtype=np.float32)) + self._assert_equal(y, np.nan, dtype=np.float32) + y = mstats.moment(np.zeros((1, 0)), axis=0) + self._assert_equal(y, [], shape=(0,), dtype=np.float64) + y = mstats.moment([[]], axis=1) + self._assert_equal(y, np.nan, shape=(1,), dtype=np.float64) + y = mstats.moment([[]], moment=[0, 1], axis=0) + self._assert_equal(y, [], shape=(2, 0)) + + x = np.arange(10.) + x[9] = np.nan + assert_equal(mstats.moment(x, 2), ma.masked) # NaN value is ignored + + def test_variation(self): + y = mstats.variation(self.testcase) + assert_almost_equal(y,0.44721359549996, 10) + + def test_variation_ddof(self): + # test variation with delta degrees of freedom + # regression test for gh-13341 + a = np.array([1, 2, 3, 4, 5]) + y = mstats.variation(a, ddof=1) + assert_almost_equal(y, 0.5270462766947299) + + def test_skewness(self): + y = mstats.skew(self.testmathworks) + assert_almost_equal(y,-0.29322304336607,10) + y = mstats.skew(self.testmathworks,bias=0) + assert_almost_equal(y,-0.437111105023940,10) + y = mstats.skew(self.testcase) + assert_almost_equal(y,0.0,10) + + # test that skew works on multidimensional masked arrays + correct_2d = ma.array( + np.array([0.6882870394455785, 0, 0.2665647526856708, + 0, -0.05211472114254485]), + mask=np.array([False, False, False, True, False], dtype=bool) + ) + assert_allclose(mstats.skew(self.testcase_2d, 1), correct_2d) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.skew(row), correct_2d[i]) + + correct_2d_bias_corrected = ma.array( + np.array([1.685952043212545, 0.0, 0.3973712716070531, 0, + -0.09026534484117164]), + mask=np.array([False, False, False, True, False], dtype=bool) + ) + assert_allclose(mstats.skew(self.testcase_2d, 1, bias=False), + correct_2d_bias_corrected) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.skew(row, bias=False), + correct_2d_bias_corrected[i]) + + # Check consistency between stats and mstats implementations + assert_allclose(mstats.skew(self.testcase_2d[2, :]), + stats.skew(self.testcase_2d[2, :])) + + def test_kurtosis(self): + # Set flags for axis = 0 and fisher=0 (Pearson's definition of kurtosis + # for compatibility with Matlab) + y = mstats.kurtosis(self.testmathworks, 0, fisher=0, bias=1) + assert_almost_equal(y, 2.1658856802973, 10) + # Note that MATLAB has confusing docs for the following case + # kurtosis(x,0) gives an unbiased estimate of Pearson's skewness + # kurtosis(x) gives a biased estimate of Fisher's skewness (Pearson-3) + # The MATLAB docs imply that both should give Fisher's + y = mstats.kurtosis(self.testmathworks, fisher=0, bias=0) + assert_almost_equal(y, 3.663542721189047, 10) + y = mstats.kurtosis(self.testcase, 0, 0) + assert_almost_equal(y, 1.64) + + # test that kurtosis works on multidimensional masked arrays + correct_2d = ma.array(np.array([-1.5, -3., -1.47247052385, 0., + -1.26979517952]), + mask=np.array([False, False, False, True, + False], dtype=bool)) + assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1), + correct_2d) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.kurtosis(row), correct_2d[i]) + + correct_2d_bias_corrected = ma.array( + np.array([-1.5, -3., -1.88988209538, 0., -0.5234638463918877]), + mask=np.array([False, False, False, True, False], dtype=bool)) + assert_array_almost_equal(mstats.kurtosis(self.testcase_2d, 1, + bias=False), + correct_2d_bias_corrected) + for i, row in enumerate(self.testcase_2d): + assert_almost_equal(mstats.kurtosis(row, bias=False), + correct_2d_bias_corrected[i]) + + # Check consistency between stats and mstats implementations + assert_array_almost_equal_nulp(mstats.kurtosis(self.testcase_2d[2, :]), + stats.kurtosis(self.testcase_2d[2, :]), + nulp=4) + + +class TestMode: + def test_mode(self): + a1 = [0,0,0,1,1,1,2,3,3,3,3,4,5,6,7] + a2 = np.reshape(a1, (3,5)) + a3 = np.array([1,2,3,4,5,6]) + a4 = np.reshape(a3, (3,2)) + ma1 = ma.masked_where(ma.array(a1) > 2, a1) + ma2 = ma.masked_where(a2 > 2, a2) + ma3 = ma.masked_where(a3 < 2, a3) + ma4 = ma.masked_where(ma.array(a4) < 2, a4) + assert_equal(mstats.mode(a1, axis=None), (3,4)) + assert_equal(mstats.mode(a1, axis=0), (3,4)) + assert_equal(mstats.mode(ma1, axis=None), (0,3)) + assert_equal(mstats.mode(a2, axis=None), (3,4)) + assert_equal(mstats.mode(ma2, axis=None), (0,3)) + assert_equal(mstats.mode(a3, axis=None), (1,1)) + assert_equal(mstats.mode(ma3, axis=None), (2,1)) + assert_equal(mstats.mode(a2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]])) + assert_equal(mstats.mode(ma2, axis=0), ([[0,0,0,1,1]], [[1,1,1,1,1]])) + assert_equal(mstats.mode(a2, axis=-1), ([[0],[3],[3]], [[3],[3],[1]])) + assert_equal(mstats.mode(ma2, axis=-1), ([[0],[1],[0]], [[3],[1],[0]])) + assert_equal(mstats.mode(ma4, axis=0), ([[3,2]], [[1,1]])) + assert_equal(mstats.mode(ma4, axis=-1), ([[2],[3],[5]], [[1],[1],[1]])) + + a1_res = mstats.mode(a1, axis=None) + + # test for namedtuple attributes + attributes = ('mode', 'count') + check_named_results(a1_res, attributes, ma=True) + + def test_mode_modifies_input(self): + # regression test for gh-6428: mode(..., axis=None) may not modify + # the input array + im = np.zeros((100, 100)) + im[:50, :] += 1 + im[:, :50] += 1 + cp = im.copy() + mstats.mode(im, None) + assert_equal(im, cp) + + +class TestPercentile: + def setup_method(self): + self.a1 = [3, 4, 5, 10, -3, -5, 6] + self.a2 = [3, -6, -2, 8, 7, 4, 2, 1] + self.a3 = [3., 4, 5, 10, -3, -5, -6, 7.0] + + def test_percentile(self): + x = np.arange(8) * 0.5 + assert_equal(mstats.scoreatpercentile(x, 0), 0.) + assert_equal(mstats.scoreatpercentile(x, 100), 3.5) + assert_equal(mstats.scoreatpercentile(x, 50), 1.75) + + def test_2D(self): + x = ma.array([[1, 1, 1], + [1, 1, 1], + [4, 4, 3], + [1, 1, 1], + [1, 1, 1]]) + assert_equal(mstats.scoreatpercentile(x, 50), [1, 1, 1]) + + +class TestVariability: + """ Comparison numbers are found using R v.1.5.1 + note that length(testcase) = 4 + """ + testcase = ma.fix_invalid([1,2,3,4,np.nan]) + + def test_sem(self): + # This is not in R, so used: sqrt(var(testcase)*3/4) / sqrt(3) + y = mstats.sem(self.testcase) + assert_almost_equal(y, 0.6454972244) + n = self.testcase.count() + assert_allclose(mstats.sem(self.testcase, ddof=0) * np.sqrt(n/(n-2)), + mstats.sem(self.testcase, ddof=2)) + + def test_zmap(self): + # This is not in R, so tested by using: + # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4) + y = mstats.zmap(self.testcase, self.testcase) + desired_unmaskedvals = ([-1.3416407864999, -0.44721359549996, + 0.44721359549996, 1.3416407864999]) + assert_array_almost_equal(desired_unmaskedvals, + y.data[y.mask == False], decimal=12) # noqa: E712 + + def test_zscore(self): + # This is not in R, so tested by using: + # (testcase[i]-mean(testcase,axis=0)) / sqrt(var(testcase)*3/4) + y = mstats.zscore(self.testcase) + desired = ma.fix_invalid([-1.3416407864999, -0.44721359549996, + 0.44721359549996, 1.3416407864999, np.nan]) + assert_almost_equal(desired, y, decimal=12) + + +class TestMisc: + + def test_obrientransform(self): + args = [[5]*5+[6]*11+[7]*9+[8]*3+[9]*2+[10]*2, + [6]+[7]*2+[8]*4+[9]*9+[10]*16] + result = [5*[3.1828]+11*[0.5591]+9*[0.0344]+3*[1.6086]+2*[5.2817]+2*[11.0538], + [10.4352]+2*[4.8599]+4*[1.3836]+9*[0.0061]+16*[0.7277]] + assert_almost_equal(np.round(mstats.obrientransform(*args).T, 4), + result, 4) + + def test_ks_2samp(self): + x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], + [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], + [3, 2, 5, 6, 18, 4, 9, 1, 1, nan, 1, 1, nan], + [nan, 6, 11, 4, 17, nan, 6, 1, 1, 2, 5, 1, 1]] + x = ma.fix_invalid(x).T + (winter, spring, summer, fall) = x.T + + assert_almost_equal(np.round(mstats.ks_2samp(winter, spring), 4), + (0.1818, 0.9628)) + assert_almost_equal(np.round(mstats.ks_2samp(winter, spring, 'g'), 4), + (0.1469, 0.6886)) + assert_almost_equal(np.round(mstats.ks_2samp(winter, spring, 'l'), 4), + (0.1818, 0.6011)) + + def test_friedmanchisq(self): + # No missing values + args = ([9.0,9.5,5.0,7.5,9.5,7.5,8.0,7.0,8.5,6.0], + [7.0,6.5,7.0,7.5,5.0,8.0,6.0,6.5,7.0,7.0], + [6.0,8.0,4.0,6.0,7.0,6.5,6.0,4.0,6.5,3.0]) + result = mstats.friedmanchisquare(*args) + assert_almost_equal(result[0], 10.4737, 4) + assert_almost_equal(result[1], 0.005317, 6) + # Missing values + x = [[nan,nan, 4, 2, 16, 26, 5, 1, 5, 1, 2, 3, 1], + [4, 3, 5, 3, 2, 7, 3, 1, 1, 2, 3, 5, 3], + [3, 2, 5, 6, 18, 4, 9, 1, 1,nan, 1, 1,nan], + [nan, 6, 11, 4, 17,nan, 6, 1, 1, 2, 5, 1, 1]] + x = ma.fix_invalid(x) + result = mstats.friedmanchisquare(*x) + assert_almost_equal(result[0], 2.0156, 4) + assert_almost_equal(result[1], 0.5692, 4) + + # test for namedtuple attributes + attributes = ('statistic', 'pvalue') + check_named_results(result, attributes, ma=True) + + +def test_regress_simple(): + # Regress a line with sinusoidal noise. Test for #1273. + x = np.linspace(0, 100, 100) + y = 0.2 * np.linspace(0, 100, 100) + 10 + y += np.sin(np.linspace(0, 20, 100)) + + result = mstats.linregress(x, y) + + # Result is of a correct class and with correct fields + lr = stats._stats_mstats_common.LinregressResult + assert_(isinstance(result, lr)) + attributes = ('slope', 'intercept', 'rvalue', 'pvalue', 'stderr') + check_named_results(result, attributes, ma=True) + assert 'intercept_stderr' in dir(result) + + # Slope and intercept are estimated correctly + assert_almost_equal(result.slope, 0.19644990055858422) + assert_almost_equal(result.intercept, 10.211269918932341) + assert_almost_equal(result.stderr, 0.002395781449783862) + assert_almost_equal(result.intercept_stderr, 0.13866936078570702) + + +def test_linregress_identical_x(): + x = np.zeros(10) + y = np.random.random(10) + msg = "Cannot calculate a linear regression if all x values are identical" + with assert_raises(ValueError, match=msg): + mstats.linregress(x, y) + + +class TestTheilslopes: + def test_theilslopes(self): + # Test for basic slope and intercept. + slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1]) + assert_almost_equal(slope, 0.5) + assert_almost_equal(intercept, 0.5) + + slope, intercept, lower, upper = mstats.theilslopes([0, 1, 1], + method='joint') + assert_almost_equal(slope, 0.5) + assert_almost_equal(intercept, 0.0) + + # Test for correct masking. + y = np.ma.array([0, 1, 100, 1], mask=[False, False, True, False]) + slope, intercept, lower, upper = mstats.theilslopes(y) + assert_almost_equal(slope, 1./3) + assert_almost_equal(intercept, 2./3) + + slope, intercept, lower, upper = mstats.theilslopes(y, + method='joint') + assert_almost_equal(slope, 1./3) + assert_almost_equal(intercept, 0.0) + + # Test of confidence intervals from example in Sen (1968). + x = [1, 2, 3, 4, 10, 12, 18] + y = [9, 15, 19, 20, 45, 55, 78] + slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07) + assert_almost_equal(slope, 4) + assert_almost_equal(intercept, 4.0) + assert_almost_equal(upper, 4.38, decimal=2) + assert_almost_equal(lower, 3.71, decimal=2) + + slope, intercept, lower, upper = mstats.theilslopes(y, x, 0.07, + method='joint') + assert_almost_equal(slope, 4) + assert_almost_equal(intercept, 6.0) + assert_almost_equal(upper, 4.38, decimal=2) + assert_almost_equal(lower, 3.71, decimal=2) + + + def test_theilslopes_warnings(self): + # Test `theilslopes` with degenerate input; see gh-15943 + msg = "All `x` coordinates.*|Mean of empty slice.|invalid value encountered.*" + with pytest.warns(RuntimeWarning, match=msg): + res = mstats.theilslopes([0, 1], [0, 0]) + assert np.all(np.isnan(res)) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered...") + res = mstats.theilslopes([0, 0, 0], [0, 1, 0]) + assert_allclose(res, (0, 0, np.nan, np.nan)) + + + def test_theilslopes_namedtuple_consistency(self): + """ + Simple test to ensure tuple backwards-compatibility of the returned + TheilslopesResult object + """ + y = [1, 2, 4] + x = [4, 6, 8] + slope, intercept, low_slope, high_slope = mstats.theilslopes(y, x) + result = mstats.theilslopes(y, x) + + # note all four returned values are distinct here + assert_equal(slope, result.slope) + assert_equal(intercept, result.intercept) + assert_equal(low_slope, result.low_slope) + assert_equal(high_slope, result.high_slope) + + def test_gh19678_uint8(self): + # `theilslopes` returned unexpected results when `y` was an unsigned type. + # Check that this is resolved. + rng = np.random.default_rng(2549824598234528) + y = rng.integers(0, 255, size=10, dtype=np.uint8) + res = stats.theilslopes(y, y) + np.testing.assert_allclose(res.slope, 1) + + +def test_siegelslopes(): + # method should be exact for straight line + y = 2 * np.arange(10) + 0.5 + assert_equal(mstats.siegelslopes(y), (2.0, 0.5)) + assert_equal(mstats.siegelslopes(y, method='separate'), (2.0, 0.5)) + + x = 2 * np.arange(10) + y = 5 * x - 3.0 + assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0)) + assert_equal(mstats.siegelslopes(y, x, method='separate'), (5.0, -3.0)) + + # method is robust to outliers: brekdown point of 50% + y[:4] = 1000 + assert_equal(mstats.siegelslopes(y, x), (5.0, -3.0)) + + # if there are no outliers, results should be comparble to linregress + x = np.arange(10) + y = -2.3 + 0.3*x + stats.norm.rvs(size=10, random_state=231) + slope_ols, intercept_ols, _, _, _ = stats.linregress(x, y) + + slope, intercept = mstats.siegelslopes(y, x) + assert_allclose(slope, slope_ols, rtol=0.1) + assert_allclose(intercept, intercept_ols, rtol=0.1) + + slope, intercept = mstats.siegelslopes(y, x, method='separate') + assert_allclose(slope, slope_ols, rtol=0.1) + assert_allclose(intercept, intercept_ols, rtol=0.1) + + +def test_siegelslopes_namedtuple_consistency(): + """ + Simple test to ensure tuple backwards-compatibility of the returned + SiegelslopesResult object. + """ + y = [1, 2, 4] + x = [4, 6, 8] + slope, intercept = mstats.siegelslopes(y, x) + result = mstats.siegelslopes(y, x) + + # note both returned values are distinct here + assert_equal(slope, result.slope) + assert_equal(intercept, result.intercept) + + +def test_sen_seasonal_slopes(): + rng = np.random.default_rng(5765986256978575148) + x = rng.random(size=(100, 4)) + intra_slope, inter_slope = mstats.sen_seasonal_slopes(x) + + # reference implementation from the `sen_seasonal_slopes` documentation + def dijk(yi): + n = len(yi) + x = np.arange(n) + dy = yi - yi[:, np.newaxis] + dx = x - x[:, np.newaxis] + mask = np.triu(np.ones((n, n), dtype=bool), k=1) + return dy[mask]/dx[mask] + + for i in range(4): + assert_allclose(np.median(dijk(x[:, i])), intra_slope[i]) + + all_slopes = np.concatenate([dijk(x[:, i]) for i in range(x.shape[1])]) + assert_allclose(np.median(all_slopes), inter_slope) + + +def test_plotting_positions(): + # Regression test for #1256 + pos = mstats.plotting_positions(np.arange(3), 0, 0) + assert_array_almost_equal(pos.data, np.array([0.25, 0.5, 0.75])) + + +class TestNormalitytests: + + def test_vs_nonmasked(self): + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + assert_array_almost_equal(mstats.normaltest(x), + stats.normaltest(x)) + assert_array_almost_equal(mstats.skewtest(x), + stats.skewtest(x)) + assert_array_almost_equal(mstats.kurtosistest(x), + stats.kurtosistest(x)) + + funcs = [stats.normaltest, stats.skewtest, stats.kurtosistest] + mfuncs = [mstats.normaltest, mstats.skewtest, mstats.kurtosistest] + x = [1, 2, 3, 4] + for func, mfunc in zip(funcs, mfuncs): + assert_raises(ValueError, func, x) + assert_raises(ValueError, mfunc, x) + + def test_axis_None(self): + # Test axis=None (equal to axis=0 for 1-D input) + x = np.array((-2,-1,0,1,2,3)*4)**2 + assert_allclose(mstats.normaltest(x, axis=None), mstats.normaltest(x)) + assert_allclose(mstats.skewtest(x, axis=None), mstats.skewtest(x)) + assert_allclose(mstats.kurtosistest(x, axis=None), + mstats.kurtosistest(x)) + + def test_maskedarray_input(self): + # Add some masked values, test result doesn't change + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + xm = np.ma.array(np.r_[np.inf, x, 10], + mask=np.r_[True, [False] * x.size, True]) + assert_allclose(mstats.normaltest(xm), stats.normaltest(x)) + assert_allclose(mstats.skewtest(xm), stats.skewtest(x)) + assert_allclose(mstats.kurtosistest(xm), stats.kurtosistest(x)) + + def test_nd_input(self): + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + x_2d = np.vstack([x] * 2).T + for func in [mstats.normaltest, mstats.skewtest, mstats.kurtosistest]: + res_1d = func(x) + res_2d = func(x_2d) + assert_allclose(res_2d[0], [res_1d[0]] * 2) + assert_allclose(res_2d[1], [res_1d[1]] * 2) + + def test_normaltest_result_attributes(self): + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + res = mstats.normaltest(x) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_kurtosistest_result_attributes(self): + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + res = mstats.kurtosistest(x) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_regression_9033(self): + # x clearly non-normal but power of negative denom needs + # to be handled correctly to reject normality + counts = [128, 0, 58, 7, 0, 41, 16, 0, 0, 167] + x = np.hstack([np.full(c, i) for i, c in enumerate(counts)]) + assert_equal(mstats.kurtosistest(x)[1] < 0.01, True) + + @pytest.mark.parametrize("test", ["skewtest", "kurtosistest"]) + @pytest.mark.parametrize("alternative", ["less", "greater"]) + def test_alternative(self, test, alternative): + x = stats.norm.rvs(loc=10, scale=2.5, size=30, random_state=123) + + stats_test = getattr(stats, test) + mstats_test = getattr(mstats, test) + + z_ex, p_ex = stats_test(x, alternative=alternative) + z, p = mstats_test(x, alternative=alternative) + assert_allclose(z, z_ex, atol=1e-12) + assert_allclose(p, p_ex, atol=1e-12) + + # test with masked arrays + x[1:5] = np.nan + x = np.ma.masked_array(x, mask=np.isnan(x)) + z_ex, p_ex = stats_test(x.compressed(), alternative=alternative) + z, p = mstats_test(x, alternative=alternative) + assert_allclose(z, z_ex, atol=1e-12) + assert_allclose(p, p_ex, atol=1e-12) + + def test_bad_alternative(self): + x = stats.norm.rvs(size=20, random_state=123) + msg = r"`alternative` must be..." + + with pytest.raises(ValueError, match=msg): + mstats.skewtest(x, alternative='error') + + with pytest.raises(ValueError, match=msg): + mstats.kurtosistest(x, alternative='error') + + +class TestFOneway: + def test_result_attributes(self): + a = np.array([655, 788], dtype=np.uint16) + b = np.array([789, 772], dtype=np.uint16) + res = mstats.f_oneway(a, b) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + +class TestMannwhitneyu: + # data from gh-1428 + x = np.array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 2., 1., 1., 2., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 3., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1.]) + + y = np.array([1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., 1., 1., 1., + 2., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., 1., 1., 3., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 2., 1., 2., 1., + 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., 2., + 2., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 1., 1., 2., + 2., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., + 2., 1., 1., 2., 1., 1., 1., 1., 2., 1., 1., 1., 1., 1., + 1., 1., 1., 1., 1., 1., 1., 2., 1., 1., 1., 2., 1., 1., + 1., 1., 1., 1.]) + + def test_result_attributes(self): + res = mstats.mannwhitneyu(self.x, self.y) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_against_stats(self): + # gh-4641 reported that stats.mannwhitneyu returned half the p-value + # of mstats.mannwhitneyu. Default alternative of stats.mannwhitneyu + # is now two-sided, so they match. + res1 = mstats.mannwhitneyu(self.x, self.y) + res2 = stats.mannwhitneyu(self.x, self.y) + assert res1.statistic == res2.statistic + assert_allclose(res1.pvalue, res2.pvalue) + + +class TestKruskal: + def test_result_attributes(self): + x = [1, 3, 5, 7, 9] + y = [2, 4, 6, 8, 10] + + res = mstats.kruskal(x, y) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + +# TODO: for all ttest functions, add tests with masked array inputs +class TestTtest_rel: + def test_vs_nonmasked(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + # 1-D inputs + res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1]) + res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1]) + assert_allclose(res1, res2) + + # 2-D inputs + res1 = stats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None) + res2 = mstats.ttest_rel(outcome[:, 0], outcome[:, 1], axis=None) + assert_allclose(res1, res2) + res1 = stats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0) + res2 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:], axis=0) + assert_allclose(res1, res2) + + # Check default is axis=0 + res3 = mstats.ttest_rel(outcome[:, :2], outcome[:, 2:]) + assert_allclose(res2, res3) + + def test_fully_masked(self): + np.random.seed(1234567) + outcome = ma.masked_array(np.random.randn(3, 2), + mask=[[1, 1, 1], [0, 0, 0]]) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + for pair in [(outcome[:, 0], outcome[:, 1]), + ([np.nan, np.nan], [1.0, 2.0])]: + t, p = mstats.ttest_rel(*pair) + assert_array_equal(t, (np.nan, np.nan)) + assert_array_equal(p, (np.nan, np.nan)) + + def test_result_attributes(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + res = mstats.ttest_rel(outcome[:, 0], outcome[:, 1]) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_invalid_input_size(self): + assert_raises(ValueError, mstats.ttest_rel, + np.arange(10), np.arange(11)) + x = np.arange(24) + assert_raises(ValueError, mstats.ttest_rel, + x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=1) + assert_raises(ValueError, mstats.ttest_rel, + x.reshape(2, 3, 4), x.reshape(2, 4, 3), axis=2) + + def test_empty(self): + res1 = mstats.ttest_rel([], []) + assert_(np.all(np.isnan(res1))) + + def test_zero_division(self): + t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1]) + assert_equal((np.abs(t), p), (np.inf, 0)) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0]) + assert_array_equal(t, np.array([np.nan, np.nan])) + assert_array_equal(p, np.array([np.nan, np.nan])) + + def test_bad_alternative(self): + msg = r"alternative must be 'less', 'greater' or 'two-sided'" + with pytest.raises(ValueError, match=msg): + mstats.ttest_ind([1, 2, 3], [4, 5, 6], alternative='foo') + + @pytest.mark.parametrize("alternative", ["less", "greater"]) + def test_alternative(self, alternative): + x = stats.norm.rvs(loc=10, scale=5, size=25, random_state=42) + y = stats.norm.rvs(loc=8, scale=2, size=25, random_state=42) + + t_ex, p_ex = stats.ttest_rel(x, y, alternative=alternative) + t, p = mstats.ttest_rel(x, y, alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + # test with masked arrays + x[1:10] = np.nan + y[1:10] = np.nan + x = np.ma.masked_array(x, mask=np.isnan(x)) + y = np.ma.masked_array(y, mask=np.isnan(y)) + t, p = mstats.ttest_rel(x, y, alternative=alternative) + t_ex, p_ex = stats.ttest_rel(x.compressed(), y.compressed(), + alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + +class TestTtest_ind: + def test_vs_nonmasked(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + # 1-D inputs + res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1]) + res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1]) + assert_allclose(res1, res2) + + # 2-D inputs + res1 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None) + res2 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], axis=None) + assert_allclose(res1, res2) + res1 = stats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0) + res2 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:], axis=0) + assert_allclose(res1, res2) + + # Check default is axis=0 + res3 = mstats.ttest_ind(outcome[:, :2], outcome[:, 2:]) + assert_allclose(res2, res3) + + # Check equal_var + res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True) + res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=True) + assert_allclose(res4, res5) + res4 = stats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False) + res5 = mstats.ttest_ind(outcome[:, 0], outcome[:, 1], equal_var=False) + assert_allclose(res4, res5) + + def test_fully_masked(self): + np.random.seed(1234567) + outcome = ma.masked_array(np.random.randn(3, 2), mask=[[1, 1, 1], [0, 0, 0]]) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + for pair in [(outcome[:, 0], outcome[:, 1]), + ([np.nan, np.nan], [1.0, 2.0])]: + t, p = mstats.ttest_ind(*pair) + assert_array_equal(t, (np.nan, np.nan)) + assert_array_equal(p, (np.nan, np.nan)) + + def test_result_attributes(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + res = mstats.ttest_ind(outcome[:, 0], outcome[:, 1]) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_empty(self): + res1 = mstats.ttest_ind([], []) + assert_(np.all(np.isnan(res1))) + + def test_zero_division(self): + t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1]) + assert_equal((np.abs(t), p), (np.inf, 0)) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + t, p = mstats.ttest_ind([0, 0, 0], [0, 0, 0]) + assert_array_equal(t, (np.nan, np.nan)) + assert_array_equal(p, (np.nan, np.nan)) + + t, p = mstats.ttest_ind([0, 0, 0], [1, 1, 1], equal_var=False) + assert_equal((np.abs(t), p), (np.inf, 0)) + assert_array_equal(mstats.ttest_ind([0, 0, 0], [0, 0, 0], + equal_var=False), (np.nan, np.nan)) + + def test_bad_alternative(self): + msg = r"alternative must be 'less', 'greater' or 'two-sided'" + with pytest.raises(ValueError, match=msg): + mstats.ttest_ind([1, 2, 3], [4, 5, 6], alternative='foo') + + @pytest.mark.parametrize("alternative", ["less", "greater"]) + def test_alternative(self, alternative): + x = stats.norm.rvs(loc=10, scale=2, size=100, random_state=123) + y = stats.norm.rvs(loc=8, scale=2, size=100, random_state=123) + + t_ex, p_ex = stats.ttest_ind(x, y, alternative=alternative) + t, p = mstats.ttest_ind(x, y, alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + # test with masked arrays + x[1:10] = np.nan + y[80:90] = np.nan + x = np.ma.masked_array(x, mask=np.isnan(x)) + y = np.ma.masked_array(y, mask=np.isnan(y)) + t_ex, p_ex = stats.ttest_ind(x.compressed(), y.compressed(), + alternative=alternative) + t, p = mstats.ttest_ind(x, y, alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + +class TestTtest_1samp: + def test_vs_nonmasked(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + # 1-D inputs + res1 = stats.ttest_1samp(outcome[:, 0], 1) + res2 = mstats.ttest_1samp(outcome[:, 0], 1) + assert_allclose(res1, res2) + + def test_fully_masked(self): + np.random.seed(1234567) + outcome = ma.masked_array(np.random.randn(3), mask=[1, 1, 1]) + expected = (np.nan, np.nan) + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + for pair in [((np.nan, np.nan), 0.0), (outcome, 0.0)]: + t, p = mstats.ttest_1samp(*pair) + assert_array_equal(p, expected) + assert_array_equal(t, expected) + + def test_result_attributes(self): + np.random.seed(1234567) + outcome = np.random.randn(20, 4) + [0, 0, 1, 2] + + res = mstats.ttest_1samp(outcome[:, 0], 1) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_empty(self): + res1 = mstats.ttest_1samp([], 1) + assert_(np.all(np.isnan(res1))) + + def test_zero_division(self): + t, p = mstats.ttest_1samp([0, 0, 0], 1) + assert_equal((np.abs(t), p), (np.inf, 0)) + + with suppress_warnings() as sup: + sup.filter(RuntimeWarning, "invalid value encountered in absolute") + t, p = mstats.ttest_1samp([0, 0, 0], 0) + assert_(np.isnan(t)) + assert_array_equal(p, (np.nan, np.nan)) + + def test_bad_alternative(self): + msg = r"alternative must be 'less', 'greater' or 'two-sided'" + with pytest.raises(ValueError, match=msg): + mstats.ttest_1samp([1, 2, 3], 4, alternative='foo') + + @pytest.mark.parametrize("alternative", ["less", "greater"]) + def test_alternative(self, alternative): + x = stats.norm.rvs(loc=10, scale=2, size=100, random_state=123) + + t_ex, p_ex = stats.ttest_1samp(x, 9, alternative=alternative) + t, p = mstats.ttest_1samp(x, 9, alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + # test with masked arrays + x[1:10] = np.nan + x = np.ma.masked_array(x, mask=np.isnan(x)) + t_ex, p_ex = stats.ttest_1samp(x.compressed(), 9, + alternative=alternative) + t, p = mstats.ttest_1samp(x, 9, alternative=alternative) + assert_allclose(t, t_ex, rtol=1e-14) + assert_allclose(p, p_ex, rtol=1e-14) + + +class TestDescribe: + """ + Tests for mstats.describe. + + Note that there are also tests for `mstats.describe` in the + class TestCompareWithStats. + """ + def test_basic_with_axis(self): + # This is a basic test that is also a regression test for gh-7303. + a = np.ma.masked_array([[0, 1, 2, 3, 4, 9], + [5, 5, 0, 9, 3, 3]], + mask=[[0, 0, 0, 0, 0, 1], + [0, 0, 1, 1, 0, 0]]) + result = mstats.describe(a, axis=1) + assert_equal(result.nobs, [5, 4]) + amin, amax = result.minmax + assert_equal(amin, [0, 3]) + assert_equal(amax, [4, 5]) + assert_equal(result.mean, [2.0, 4.0]) + assert_equal(result.variance, [2.0, 1.0]) + assert_equal(result.skewness, [0.0, 0.0]) + assert_allclose(result.kurtosis, [-1.3, -2.0]) + + +class TestCompareWithStats: + """ + Class to compare mstats results with stats results. + + It is in general assumed that scipy.stats is at a more mature stage than + stats.mstats. If a routine in mstats results in similar results like in + scipy.stats, this is considered also as a proper validation of scipy.mstats + routine. + + Different sample sizes are used for testing, as some problems between stats + and mstats are dependent on sample size. + + Author: Alexander Loew + + NOTE that some tests fail. This might be caused by + a) actual differences or bugs between stats and mstats + b) numerical inaccuracies + c) different definitions of routine interfaces + + These failures need to be checked. Current workaround is to have disabled these + tests, but issuing reports on scipy-dev + + """ + def get_n(self): + """ Returns list of sample sizes to be used for comparison. """ + return [1000, 100, 10, 5] + + def generate_xy_sample(self, n): + # This routine generates numpy arrays and corresponding masked arrays + # with the same data, but additional masked values + np.random.seed(1234567) + x = np.random.randn(n) + y = x + np.random.randn(n) + xm = np.full(len(x) + 5, 1e16) + ym = np.full(len(y) + 5, 1e16) + xm[0:len(x)] = x + ym[0:len(y)] = y + mask = xm > 9e15 + xm = np.ma.array(xm, mask=mask) + ym = np.ma.array(ym, mask=mask) + return x, y, xm, ym + + def generate_xy_sample2D(self, n, nx): + x = np.full((n, nx), np.nan) + y = np.full((n, nx), np.nan) + xm = np.full((n+5, nx), np.nan) + ym = np.full((n+5, nx), np.nan) + + for i in range(nx): + x[:, i], y[:, i], dx, dy = self.generate_xy_sample(n) + + xm[0:n, :] = x[0:n] + ym[0:n, :] = y[0:n] + xm = np.ma.array(xm, mask=np.isnan(xm)) + ym = np.ma.array(ym, mask=np.isnan(ym)) + return x, y, xm, ym + + def test_linregress(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + result1 = stats.linregress(x, y) + result2 = stats.mstats.linregress(xm, ym) + assert_allclose(np.asarray(result1), np.asarray(result2)) + + def test_pearsonr(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r, p = stats.pearsonr(x, y) + rm, pm = stats.mstats.pearsonr(xm, ym) + + assert_almost_equal(r, rm, decimal=14) + assert_almost_equal(p, pm, decimal=14) + + def test_spearmanr(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r, p = stats.spearmanr(x, y) + rm, pm = stats.mstats.spearmanr(xm, ym) + assert_almost_equal(r, rm, 14) + assert_almost_equal(p, pm, 14) + + def test_spearmanr_backcompat_useties(self): + # A regression test to ensure we don't break backwards compat + # more than we have to (see gh-9204). + x = np.arange(6) + assert_raises(ValueError, mstats.spearmanr, x, x, False) + + def test_gmean(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.gmean(abs(x)) + rm = stats.mstats.gmean(abs(xm)) + assert_allclose(r, rm, rtol=1e-13) + + r = stats.gmean(abs(y)) + rm = stats.mstats.gmean(abs(ym)) + assert_allclose(r, rm, rtol=1e-13) + + def test_hmean(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + + r = stats.hmean(abs(x)) + rm = stats.mstats.hmean(abs(xm)) + assert_almost_equal(r, rm, 10) + + r = stats.hmean(abs(y)) + rm = stats.mstats.hmean(abs(ym)) + assert_almost_equal(r, rm, 10) + + def test_skew(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + + r = stats.skew(x) + rm = stats.mstats.skew(xm) + assert_almost_equal(r, rm, 10) + + r = stats.skew(y) + rm = stats.mstats.skew(ym) + assert_almost_equal(r, rm, 10) + + def test_moment(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + + r = stats.moment(x) + rm = stats.mstats.moment(xm) + assert_almost_equal(r, rm, 10) + + r = stats.moment(y) + rm = stats.mstats.moment(ym) + assert_almost_equal(r, rm, 10) + + def test_zscore(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + + # reference solution + zx = (x - x.mean()) / x.std() + zy = (y - y.mean()) / y.std() + + # validate stats + assert_allclose(stats.zscore(x), zx, rtol=1e-10) + assert_allclose(stats.zscore(y), zy, rtol=1e-10) + + # compare stats and mstats + assert_allclose(stats.zscore(x), stats.mstats.zscore(xm[0:len(x)]), + rtol=1e-10) + assert_allclose(stats.zscore(y), stats.mstats.zscore(ym[0:len(y)]), + rtol=1e-10) + + def test_kurtosis(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.kurtosis(x) + rm = stats.mstats.kurtosis(xm) + assert_almost_equal(r, rm, 10) + + r = stats.kurtosis(y) + rm = stats.mstats.kurtosis(ym) + assert_almost_equal(r, rm, 10) + + def test_sem(self): + # example from stats.sem doc + a = np.arange(20).reshape(5, 4) + am = np.ma.array(a) + r = stats.sem(a, ddof=1) + rm = stats.mstats.sem(am, ddof=1) + + assert_allclose(r, 2.82842712, atol=1e-5) + assert_allclose(rm, 2.82842712, atol=1e-5) + + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=0), + stats.sem(x, axis=None, ddof=0), decimal=13) + assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=0), + stats.sem(y, axis=None, ddof=0), decimal=13) + assert_almost_equal(stats.mstats.sem(xm, axis=None, ddof=1), + stats.sem(x, axis=None, ddof=1), decimal=13) + assert_almost_equal(stats.mstats.sem(ym, axis=None, ddof=1), + stats.sem(y, axis=None, ddof=1), decimal=13) + + def test_describe(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.describe(x, ddof=1) + rm = stats.mstats.describe(xm, ddof=1) + for ii in range(6): + assert_almost_equal(np.asarray(r[ii]), + np.asarray(rm[ii]), + decimal=12) + + def test_describe_result_attributes(self): + actual = mstats.describe(np.arange(5)) + attributes = ('nobs', 'minmax', 'mean', 'variance', 'skewness', + 'kurtosis') + check_named_results(actual, attributes, ma=True) + + def test_rankdata(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.rankdata(x) + rm = stats.mstats.rankdata(x) + assert_allclose(r, rm) + + def test_tmean(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.tmean(x),stats.mstats.tmean(xm), 14) + assert_almost_equal(stats.tmean(y),stats.mstats.tmean(ym), 14) + + def test_tmax(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.tmax(x,2.), + stats.mstats.tmax(xm,2.), 10) + assert_almost_equal(stats.tmax(y,2.), + stats.mstats.tmax(ym,2.), 10) + + assert_almost_equal(stats.tmax(x, upperlimit=3.), + stats.mstats.tmax(xm, upperlimit=3.), 10) + assert_almost_equal(stats.tmax(y, upperlimit=3.), + stats.mstats.tmax(ym, upperlimit=3.), 10) + + def test_tmin(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_equal(stats.tmin(x), stats.mstats.tmin(xm)) + assert_equal(stats.tmin(y), stats.mstats.tmin(ym)) + + assert_almost_equal(stats.tmin(x, lowerlimit=-1.), + stats.mstats.tmin(xm, lowerlimit=-1.), 10) + assert_almost_equal(stats.tmin(y, lowerlimit=-1.), + stats.mstats.tmin(ym, lowerlimit=-1.), 10) + + def test_zmap(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + z = stats.zmap(x, y) + zm = stats.mstats.zmap(xm, ym) + assert_allclose(z, zm[0:len(z)], atol=1e-10) + + def test_variation(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.variation(x), stats.mstats.variation(xm), + decimal=12) + assert_almost_equal(stats.variation(y), stats.mstats.variation(ym), + decimal=12) + + def test_tvar(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.tvar(x), stats.mstats.tvar(xm), + decimal=12) + assert_almost_equal(stats.tvar(y), stats.mstats.tvar(ym), + decimal=12) + + def test_trimboth(self): + a = np.arange(20) + b = stats.trimboth(a, 0.1) + bm = stats.mstats.trimboth(a, 0.1) + assert_allclose(np.sort(b), bm.data[~bm.mask]) + + def test_tsem(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + assert_almost_equal(stats.tsem(x), stats.mstats.tsem(xm), + decimal=14) + assert_almost_equal(stats.tsem(y), stats.mstats.tsem(ym), + decimal=14) + assert_almost_equal(stats.tsem(x, limits=(-2., 2.)), + stats.mstats.tsem(xm, limits=(-2., 2.)), + decimal=14) + + def test_skewtest(self): + # this test is for 1D data + for n in self.get_n(): + if n > 8: + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.skewtest(x) + rm = stats.mstats.skewtest(xm) + assert_allclose(r, rm) + + def test_skewtest_result_attributes(self): + x = np.array((-2, -1, 0, 1, 2, 3)*4)**2 + res = mstats.skewtest(x) + attributes = ('statistic', 'pvalue') + check_named_results(res, attributes, ma=True) + + def test_skewtest_2D_notmasked(self): + # a normal ndarray is passed to the masked function + x = np.random.random((20, 2)) * 20. + r = stats.skewtest(x) + rm = stats.mstats.skewtest(x) + assert_allclose(np.asarray(r), np.asarray(rm)) + + def test_skewtest_2D_WithMask(self): + nx = 2 + for n in self.get_n(): + if n > 8: + x, y, xm, ym = self.generate_xy_sample2D(n, nx) + r = stats.skewtest(x) + rm = stats.mstats.skewtest(xm) + + assert_allclose(r[0][0], rm[0][0], rtol=1e-14) + assert_allclose(r[0][1], rm[0][1], rtol=1e-14) + + def test_normaltest(self): + with np.errstate(over='raise'), suppress_warnings() as sup: + sup.filter(UserWarning, "kurtosistest only valid for n>=20") + for n in self.get_n(): + if n > 8: + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.normaltest(x) + rm = stats.mstats.normaltest(xm) + assert_allclose(np.asarray(r), np.asarray(rm)) + + def test_find_repeats(self): + x = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4]).astype('float') + tmp = np.asarray([1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5]).astype('float') + mask = (tmp == 5.) + xm = np.ma.array(tmp, mask=mask) + x_orig, xm_orig = x.copy(), xm.copy() + + r = stats.find_repeats(x) + rm = stats.mstats.find_repeats(xm) + + assert_equal(r, rm) + assert_equal(x, x_orig) + assert_equal(xm, xm_orig) + + # This crazy behavior is expected by count_tied_groups, but is not + # in the docstring... + _, counts = stats.mstats.find_repeats([]) + assert_equal(counts, np.array(0, dtype=np.intp)) + + def test_kendalltau(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.kendalltau(x, y) + rm = stats.mstats.kendalltau(xm, ym) + assert_almost_equal(r[0], rm[0], decimal=10) + assert_almost_equal(r[1], rm[1], decimal=7) + + def test_obrientransform(self): + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + r = stats.obrientransform(x) + rm = stats.mstats.obrientransform(xm) + assert_almost_equal(r.T, rm[0:len(x)]) + + def test_ks_1samp(self): + """Checks that mstats.ks_1samp and stats.ks_1samp agree on masked arrays.""" + for mode in ['auto', 'exact', 'asymp']: + with suppress_warnings(): + for alternative in ['less', 'greater', 'two-sided']: + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + res1 = stats.ks_1samp(x, stats.norm.cdf, + alternative=alternative, mode=mode) + res2 = stats.mstats.ks_1samp(xm, stats.norm.cdf, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res2)) + res3 = stats.ks_1samp(xm, stats.norm.cdf, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res3)) + + def test_kstest_1samp(self): + """ + Checks that 1-sample mstats.kstest and stats.kstest agree on masked arrays. + """ + for mode in ['auto', 'exact', 'asymp']: + with suppress_warnings(): + for alternative in ['less', 'greater', 'two-sided']: + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + res1 = stats.kstest(x, 'norm', + alternative=alternative, mode=mode) + res2 = stats.mstats.kstest(xm, 'norm', + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res2)) + res3 = stats.kstest(xm, 'norm', + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res3)) + + def test_ks_2samp(self): + """Checks that mstats.ks_2samp and stats.ks_2samp agree on masked arrays. + gh-8431""" + for mode in ['auto', 'exact', 'asymp']: + with suppress_warnings() as sup: + if mode in ['auto', 'exact']: + message = "ks_2samp: Exact calculation unsuccessful." + sup.filter(RuntimeWarning, message) + for alternative in ['less', 'greater', 'two-sided']: + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + res1 = stats.ks_2samp(x, y, + alternative=alternative, mode=mode) + res2 = stats.mstats.ks_2samp(xm, ym, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res2)) + res3 = stats.ks_2samp(xm, y, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res3)) + + def test_kstest_2samp(self): + """ + Checks that 2-sample mstats.kstest and stats.kstest agree on masked arrays. + """ + for mode in ['auto', 'exact', 'asymp']: + with suppress_warnings() as sup: + if mode in ['auto', 'exact']: + message = "ks_2samp: Exact calculation unsuccessful." + sup.filter(RuntimeWarning, message) + for alternative in ['less', 'greater', 'two-sided']: + for n in self.get_n(): + x, y, xm, ym = self.generate_xy_sample(n) + res1 = stats.kstest(x, y, + alternative=alternative, mode=mode) + res2 = stats.mstats.kstest(xm, ym, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res2)) + res3 = stats.kstest(xm, y, + alternative=alternative, mode=mode) + assert_equal(np.asarray(res1), np.asarray(res3)) + + +class TestBrunnerMunzel: + # Data from (Lumley, 1996) + X = np.ma.masked_invalid([1, 2, 1, 1, 1, np.nan, 1, 1, + 1, 1, 1, 2, 4, 1, 1, np.nan]) + Y = np.ma.masked_invalid([3, 3, 4, 3, np.nan, 1, 2, 3, 1, 1, 5, 4]) + significant = 14 + + def test_brunnermunzel_one_sided(self): + # Results are compared with R's lawstat package. + u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='less') + u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='greater') + u3, p3 = mstats.brunnermunzel(self.X, self.Y, alternative='greater') + u4, p4 = mstats.brunnermunzel(self.Y, self.X, alternative='less') + + assert_almost_equal(p1, p2, decimal=self.significant) + assert_almost_equal(p3, p4, decimal=self.significant) + assert_(p1 != p3) + assert_almost_equal(u1, 3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u2, -3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u3, 3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u4, -3.1374674823029505, + decimal=self.significant) + assert_almost_equal(p1, 0.0028931043330757342, + decimal=self.significant) + assert_almost_equal(p3, 0.99710689566692423, + decimal=self.significant) + + def test_brunnermunzel_two_sided(self): + # Results are compared with R's lawstat package. + u1, p1 = mstats.brunnermunzel(self.X, self.Y, alternative='two-sided') + u2, p2 = mstats.brunnermunzel(self.Y, self.X, alternative='two-sided') + + assert_almost_equal(p1, p2, decimal=self.significant) + assert_almost_equal(u1, 3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u2, -3.1374674823029505, + decimal=self.significant) + assert_almost_equal(p1, 0.0057862086661515377, + decimal=self.significant) + + def test_brunnermunzel_default(self): + # The default value for alternative is two-sided + u1, p1 = mstats.brunnermunzel(self.X, self.Y) + u2, p2 = mstats.brunnermunzel(self.Y, self.X) + + assert_almost_equal(p1, p2, decimal=self.significant) + assert_almost_equal(u1, 3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u2, -3.1374674823029505, + decimal=self.significant) + assert_almost_equal(p1, 0.0057862086661515377, + decimal=self.significant) + + def test_brunnermunzel_alternative_error(self): + alternative = "error" + distribution = "t" + assert_(alternative not in ["two-sided", "greater", "less"]) + assert_raises(ValueError, + mstats.brunnermunzel, + self.X, + self.Y, + alternative, + distribution) + + def test_brunnermunzel_distribution_norm(self): + u1, p1 = mstats.brunnermunzel(self.X, self.Y, distribution="normal") + u2, p2 = mstats.brunnermunzel(self.Y, self.X, distribution="normal") + assert_almost_equal(p1, p2, decimal=self.significant) + assert_almost_equal(u1, 3.1374674823029505, + decimal=self.significant) + assert_almost_equal(u2, -3.1374674823029505, + decimal=self.significant) + assert_almost_equal(p1, 0.0017041417600383024, + decimal=self.significant) + + def test_brunnermunzel_distribution_error(self): + alternative = "two-sided" + distribution = "error" + assert_(alternative not in ["t", "normal"]) + assert_raises(ValueError, + mstats.brunnermunzel, + self.X, + self.Y, + alternative, + distribution) + + def test_brunnermunzel_empty_imput(self): + u1, p1 = mstats.brunnermunzel(self.X, []) + u2, p2 = mstats.brunnermunzel([], self.Y) + u3, p3 = mstats.brunnermunzel([], []) + + assert_(np.isnan(u1)) + assert_(np.isnan(p1)) + assert_(np.isnan(u2)) + assert_(np.isnan(p2)) + assert_(np.isnan(u3)) + assert_(np.isnan(p3)) diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_compositeexplicitautogradnonfunctional_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8088d1c6ffc9e4abef1549e5fcb38b3d4d47aee6 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_modified_bessel_k0(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cpu_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8ba8fe9e283f88875a8a9b2d3f71ecf98215a43b --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_modified_bessel_k0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d041899facf7819d40fc8dac67744c9c780b1080 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_modified_bessel_k0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..4f0db26ccde177e3ae263697bcb862caae62298f --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_special_modified_bessel_k0 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6f9bebb8c8c08209ba443e381e3d200094f2395a --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_modified_bessel_k0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_native.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2b5b3e7b98a4f54f3dd431898617fa6f4ac59cce --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_modified_bessel_k0_out : public at::meta::structured_special_modified_bessel_k0 { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_ops.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..57c99fc0c0ab63648575d7086bc21cd91866a5bc --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k0_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_modified_bessel_k0 { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_modified_bessel_k0") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_modified_bessel_k0(Tensor self) -> Tensor") + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_modified_bessel_k0_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_modified_bessel_k0") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_modified_bessel_k0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1.h new file mode 100644 index 0000000000000000000000000000000000000000..9e65b18fb7ea9a296012339b06afb5f4e4d080ed --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_modified_bessel_k1(Tensor self) -> Tensor +inline at::Tensor special_modified_bessel_k1(const at::Tensor & self) { + return at::_ops::special_modified_bessel_k1::call(self); +} + +// aten::special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_k1_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_modified_bessel_k1_out::call(self, out); +} +// aten::special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_k1_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_modified_bessel_k1_out::call(self, out); +} + +} diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e98dcc66b7810aed81ac766b079802b47dd2f0eb --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,23 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cpu_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ac5ce611bdfb47250beb7e995293c3c171a0f709 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cpu_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cuda_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1cac4c691d479d360586b47db2b061fd7594677c --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_cuda_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cuda +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..f9343c87c807294d6149975f1f71bd47b79028f5 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta.h @@ -0,0 +1,27 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_special_modified_bessel_k1 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4740c6a05c0b249eca1eea1853634b05b9057915 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_meta_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_k1_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace meta +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_native.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_native.h new file mode 100644 index 0000000000000000000000000000000000000000..9ee4682649dd9b98377cee3840f48400c4e1c3d0 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_native.h @@ -0,0 +1,23 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_modified_bessel_k1_out : public at::meta::structured_special_modified_bessel_k1 { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_ops.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29e402495d43e52b0ab5218137d3fe22337eef49 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_ops.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_modified_bessel_k1 { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_modified_bessel_k1") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_modified_bessel_k1(Tensor self) -> Tensor") + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_modified_bessel_k1_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(name, "aten::special_modified_bessel_k1") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(overload_name, "out") + STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(schema_str, "special_modified_bessel_k1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)") + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln.h new file mode 100644 index 0000000000000000000000000000000000000000..6e26ef4b02e4d0bafcfcd45fcc73c4b96ad3a4be --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln.h @@ -0,0 +1,39 @@ +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_multigammaln(Tensor self, int p) -> Tensor +inline at::Tensor special_multigammaln(const at::Tensor & self, int64_t p) { + return at::_ops::special_multigammaln::call(self, p); +} + +// aten::special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_multigammaln_out(at::Tensor & out, const at::Tensor & self, int64_t p) { + return at::_ops::special_multigammaln_out::call(self, p, out); +} +// aten::special_multigammaln.out(Tensor self, int p, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_multigammaln_outf(const at::Tensor & self, int64_t p, at::Tensor & out) { + return at::_ops::special_multigammaln_out::call(self, p, out); +} + +} diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_compositeimplicitautograd_dispatch.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f5981a5a742c806cdf716cb298765e95c77e48bb --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_compositeimplicitautograd_dispatch.h @@ -0,0 +1,25 @@ +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_multigammaln(const at::Tensor & self, int64_t p); +TORCH_API at::Tensor & special_multigammaln_out(at::Tensor & out, const at::Tensor & self, int64_t p); +TORCH_API at::Tensor & special_multigammaln_outf(const at::Tensor & self, int64_t p, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at diff --git a/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_native.h b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5cecc20b4ab0f1c7cc5b50d23cba7626d13aeda1 --- /dev/null +++ b/.venv/Lib/site-packages/torch/include/ATen/ops/special_multigammaln_native.h @@ -0,0 +1,22 @@ +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor special_multigammaln(const at::Tensor & self, int64_t p); +TORCH_API at::Tensor & special_multigammaln_out(const at::Tensor & self, int64_t p, at::Tensor & out); +} // namespace native +} // namespace at