#define WANT_STREAM
#define WANT_MATH
#include "include.h"
#include "newran.h"
#ifdef use_namespace
using namespace NEWRAN;
#endif
void SortAscending(Real* data, int max);
Real KS(Real* data, int n);
Real NormalDF(Real x);
double invchi95(int N);
double invchi99(int N);
void ChiSquaredTest(int* Observed, Real* Prob, int N, int n);
void TestBinomial(int N, Real p, int n);
void TestPoisson(Real mu, int n);
void TestNegativeBinomial(Real NX, Real p, int n);
void TestDiscreteGen(int N, Real* prob, int n);
inline Real square(Real x) { return x*x; }
inline Real cube(Real x) { return x*x*x; }
void test3(int n)
{
cout << endl;
// Do chi-squared tests to discrete data
cout << "ChiSquared tests" << endl;
{
Real p[] = { 0.05, 0.10, 0.05, 0.5, 0.01, 0.01, 0.03, 0.20, 0.05 };
TestDiscreteGen(9, p, n);
}
{
Real p[] = { 0.4, 0.2, 0.1, 0.05, 0.025, 0.0125, 0.00625, 0.00625, 0.2 };
TestDiscreteGen(9, p, n);
}
TestNegativeBinomial(200.3, 0.05, n);
TestNegativeBinomial(150.3, 0.15, n);
TestNegativeBinomial(100.8, 0.18, n);
TestNegativeBinomial(100.8, 1.22, n);
TestNegativeBinomial(100.8, 9.0, n);
TestNegativeBinomial(10.5, 0.18, n);
TestNegativeBinomial(10.5, 1.22, n);
TestNegativeBinomial(10.5, 9.0, n);
TestNegativeBinomial(0.35, 0.18, n);
TestNegativeBinomial(0.35, 1.22, n);
TestNegativeBinomial(0.35, 9.0, n);
TestBinomial(100, 0.45, n);
TestBinomial(100, 0.25, n);
TestBinomial(100, 0.02, n);
TestBinomial(100, 0.01, n);
TestBinomial(49, 0.60, n);
TestBinomial(21, 0.70, n);
TestBinomial(10, 0.90, n);
TestBinomial(10, 0.25, n);
TestBinomial(10, 0.10, n);
TestPoisson(0.75, n);
TestPoisson(4.3, n);
TestPoisson(10, n);
TestPoisson(100, n);
Real* data = new Real[n];
if (!data) Throw(Bad_alloc());
// Apply KS test to a variety of continuous distributions
// - use cdf transform to convert to uniform
cout << endl;
cout << "Kolmogorov-Smirnoff tests" << endl;
cout << "25%, 5%, 1%, .1% upper points are 1.019, 1.358, 1.628, 1.950"
<< endl;
cout << "5% lower point is 0.520" << endl;
{
ChiSq X(1, 1.44);
for (int i = 0; i < n; i++)
{
Real x = sqrt(X.Next());
data[i] = NormalDF(x - 1.2) - NormalDF(-x - 1.2);
}
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
ChiSq X(4);
for (int i = 0; i < n; i++)
{ Real x = 0.5 * X.Next(); data[i] = (1+x)*exp(-x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
ChiSq X(2);
for (int i = 0; i < n; i++) data[i] = exp(-0.5 * X.Next());
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Pareto X(0.5);
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = 1.0 / sqrt(x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Pareto X(1.5);
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = 1.0 / (x * sqrt(x)); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal X;
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = NormalDF(x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal N; SumRandom X = 10 + 5 * N;
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = NormalDF((x-10)/5); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal N; Cauchy C; MixedRandom X = N(0.9) + C(0.1);
for (int i = 0; i < n; i++)
{
Real x = X.Next();
data[i] = 0.9*NormalDF(x)+0.1*(atan(x)/3.141592654 + 0.5);
}
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal N; MixedRandom X = N(0.9) + (10*N)(0.1);
for (int i = 0; i < n; i++)
{
Real x = X.Next();
data[i] = 0.9*NormalDF(x)+0.1*NormalDF(x/10);
}
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal X0; SumRandom X = X0 * 0.6 + X0 * 0.8;
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = NormalDF(x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Normal X1;
MixedRandom X = X1(0.2) + (X1 * 2.5 + 1.1)(0.35) + (X1 + 2.3)(0.45);
for (int i = 0; i < n; i++)
{
Real x = X.Next();
data[i] = 0.20 * NormalDF(x)
+ 0.35 * NormalDF((x - 1.1) / 2.5)
+ 0.45 * NormalDF(x - 2.3);
}
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Gamma X(0.5);
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = 2.0 * NormalDF(-sqrt(2 * x)); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Gamma X(3);
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Gamma X1(0.85); Gamma X2(2.15); SumRandom X = X1 + X2;
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = (1+x+0.5*x*x)*exp(-x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Gamma X1(0.75); Gamma X2(0.25); SumRandom X = X1 + X2;
for (int i = 0; i < n; i++) data[i] = exp(-X.Next());
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Gamma X(2);
for (int i = 0; i < n; i++)
{ Real x = X.Next(); data[i] = (1+x)*exp(-x); }
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Exponential X;
for (int i = 0; i < n; i++) data[i] = exp(-X.Next());
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Cauchy X;
for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5;
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Cauchy X0; SumRandom X = X0 * 0.3 + X0 * 0.7;
for (int i = 0; i < n; i++) data[i] = atan(X.Next())/3.141592654 + 0.5;
cout << X.Name() << ": " << KS(data, n) << endl;
}
{
Uniform X;
for (int i = 0; i < n; i++) data[i] = X.Next();
cout << X.Name() << ": " << KS(data, n) << endl;
}
delete [] data;
}
/*************************** Kolmogorov Smirnov Test ************************/
// test the data in the array (length n) for being uniform (0,1)
Real KS(Real* data, int n)
{
SortAscending(data, n);
Real D = 0.0;
for (int i = 0; i < n; i++)
{
Real d1 = (Real)(i+1) / (Real)n - data[i];
Real d2 = data[i] - (Real)i / (Real)n;
if (D < d1) D = d1; if (D < d2) D = d2;
}
return D * (sqrt(n) + 0.12 + 0.11 / sqrt(n));
}
/******************************** Quick sort ********************************/
// Quicksort.
// Essentially the method described in Sedgewick's algorithms in C++
// My version is still partially recursive, unlike Segewick's, but the
// smallest segment of each split is used in the recursion, so it should
// not overlead the stack.
// If the process does not seems to be converging an exception is thrown.
#define DoSimpleSort 17 // when to switch to insert sort
#define MaxDepth 50 // maximum recursion depth
static Real SortThreeDescending(Real* a, Real* b, Real* c);
static void MyQuickSortAscending(Real* first, Real* last, int depth);
static void InsertionSortAscending(Real* first, const int length, int guard);
static Real SortThreeDescending(Real* a, Real* b, Real* c)
{
// sort *a, *b, *c; return *b; optimise for already sorted
if (*a >= *b)
{
if (*b >= *c) return *b;
else if (*a >= *c) { Real x = *c; *c = *b; *b = x; return x; }
else { Real x = *a; *a = *c; *c = *b; *b = x; return x; }
}
else if (*c >= *b) { Real x = *c; *c = *a; *a = x; return *b; }
else if (*a >= *c) { Real x = *a; *a = *b; *b = x; return x; }
else { Real x = *c; *c = *a; *a = *b; *b = x; return x; }
}
void SortAscending(Real* data, int max)
{
if (max > DoSimpleSort) MyQuickSortAscending(data, data + max - 1, 0);
InsertionSortAscending(data, max, DoSimpleSort);
}
static void InsertionSortAscending(Real* first, const int length,
int guard)
// guard gives the length of the sequence to scan to find first
// element (eg guard = length)
{
if (length <= 1) return;
// scan for first element
Real* f = first; Real v = *f; Real* h = f;
if (guard > length) guard = length; int i = guard - 1;
while (i--) if (v > *(++f)) { v = *f; h = f; }
*h = *first; *first = v;
// do the sort
i = length - 1; f = first;
while (i--)
{
Real* g = f++; h = f; v = *h;
while (*g > v) *h-- = *g--;
*h = v;
}
}
static void MyQuickSortAscending(Real* first, Real* last, int depth)
{
for (;;)
{
const int length = last - first + 1;
if (length < DoSimpleSort) return;
if (depth++ > MaxDepth)
Throw(Exception("QuickSortAscending fails"));
Real* centre = first + length/2;
const Real test = SortThreeDescending(last, centre, first);
Real* f = first; Real* l = last;
for (;;)
{
while (*(++f) < test) {}
while (*(--l) > test) {}
if (l <= f) break;
const Real temp = *f; *f = *l; *l = temp;
}
if (f > centre) { MyQuickSortAscending(l+1, last, depth); last = f-1; }
else { MyQuickSortAscending(first, f-1, depth); first = l+1; }
}
}
Real NormalDF(Real x)
{
// from Abramowitz and Stegun - accuracy 7.5E-8
// accuracy is absolute; not relative
// eventually will need a better method
// but good enough here
Real t = 1.0 / (1.0 + 0.2316419 * fabs(x));
t = ( 0.319381530
+ (-0.356563782
+ ( 1.781477937
+ (-1.821255978
+ 1.330274429 * t) * t) * t) * t) * t;
t = 0.3989422804014326779399461 * exp(-0.5 * x * x) * t;
return (x < 0) ? t : 1.0 - t;
}
void ChiSquaredTest(int* Observed, Real* Prob, int N, int n)
{
// go for at least two expected observations per cell
// work in from ends
if (N <= 0) { cout << "no categories" << endl; return; }
if (n <= 0) { cout << "no data" << endl; return; }
int O1 = 0; Real E1 = 0.0; int O2 = 0; Real E2 = 0.0;
Real CS = 0.0; int df = 0; int i = 0; int Ni = N; Real ToGo = n;
for (;;)
{
O1 += Observed[i]; Real e1 = n * Prob[i]; E1 += e1; ToGo -= e1;
if (E1 >= 2.0 && ToGo + E2 >= 2.0)
{ CS += square(O1 - E1) / E1; df += 1; O1 = 0; E1 = 0.0; }
if (i == Ni) break;
++i;
O2 += Observed[Ni]; Real e2 = n * Prob[Ni]; E2 += e2; ToGo -= e2;
if (E2 >= 2.0 && ToGo + E1 >= 2.0)
{ CS += square(O2 - E2) / E2; df += 1; O2 = 0; E2 = 0.0; }
if (i == Ni) break;
--Ni;
}
E1 += E2; O1 += O2;
if (E1 > 0.0) { CS += square(O1 - E1) / E1; df += 1; }
if (fabs(ToGo) >= 0.01) cout << "chi-squared program fails - ";
cout << "chisq = " << CS << "; df = " << (df-1)
<< "; 95% pt. = " << invchi95(df-1)
<< "; 99% pt. = " << invchi99(df-1) << endl;
}
void TestBinomial(int N, Real p, int n)
{
Binomial X(N, p);
Real q = 1.0 - p; Real ln_p = log(p); Real ln_q = log(q);
int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
int i;
for (i = 0; i <= N; i++)
{
obs[i] = 0;
prob[i] = exp(ln_gamma(N+1) - ln_gamma(i+1) - ln_gamma(N-i+1)
+ i * ln_p + (N-i) * ln_q);
}
for (i = 0; i < n; i++)
{
int b = (int)X.Next();
if (b < 0 || b > N) Throw(Logic_error("Binomial error"));
obs[b]++;
}
cout << "Binomial: "; ChiSquaredTest(obs, prob, N, n);
delete [] obs; delete [] prob;
}
void TestPoisson(Real mu, int n)
{
Poisson X(mu);
Real ln_mu = log(mu);
int N = (int)(20 + mu + 10 * sqrt(mu)); // set upper bound
if (N > n)
{
cout << "Poisson: range too large" << endl;
return;
}
int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
int i;
for (i = 0; i <= N; i++)
{ obs[i] = 0; prob[i] = exp(i * ln_mu - mu - ln_gamma(i+1)); }
for (i = 0; i < n; i++)
{
int b = (int)(X.Next());
if (b < 0 || b > N) Throw(Logic_error("Poisson error"));
obs[b]++;
}
cout << "Poisson: "; ChiSquaredTest(obs, prob, N, n);
delete [] obs; delete [] prob;
}
void TestNegativeBinomial(Real NX, Real P, int n)
{
NegativeBinomial X(NX, P);
Real Q = 1.0 + P; Real p = 1.0 / Q; Real q = 1.0 - p;
Real ln_p = log(p); Real ln_q = log(q);
Real mean = NX * P; Real var = mean * Q;
int N = (int)(20 + mean + 100 * sqrt(var)); // set upper bound
// won't be good enough for large P
if (N > n)
{
cout << "NegativeBinomial: range too large" << endl;
return;
}
int* obs = new int [N+1]; if (!obs) Throw(Bad_alloc());
Real* prob = new Real [N+1]; if (!prob) Throw(Bad_alloc());
int i;
for (i = 0; i <= N; i++)
{
obs[i] = 0;
prob[i] = exp(ln_gamma(NX+i) - ln_gamma(i+1) - ln_gamma(NX)
+ NX * ln_p + i * ln_q);
}
for (i = 0; i < n; i++)
{
int b = (int)X.Next();
if (b < 0 || b > N) Throw(Logic_error("NegativeBinomial error"));
obs[b]++;
}
cout << "NegativeBinomial: "; ChiSquaredTest(obs, prob, N, n);
delete [] obs; delete [] prob;
}
void TestDiscreteGen(int N, Real* prob, int n)
{
DiscreteGen X(N, prob);
int* obs = new int [N]; if (!obs) Throw(Bad_alloc());
int i;
for (i = 0; i < N; i++) obs[i] = 0;
for (i = 0; i < n; i++)
{
int b = (int)X.Next();
if (b < 0 || b >= N) Throw(Logic_error("DiscreteGen error"));
obs[b]++;
}
cout << "DiscreteGen: "; ChiSquaredTest(obs, prob, N-1, n);
delete [] obs;
}
// Calculate 95% point of chi-squared distribution
double invchi95(int N)
// upper 95% point of chi-squared distribution
{
if (N < 0) Throw(Logic_error("Error in invchi95 arg"));
if (N < 30)
{
double Q[] = { 0, 3.841459, 5.991465, 7.814728, 9.487729, 11.0705,
12.59159, 14.06714, 15.50731, 16.91898, 18.30704, 19.67506,
21.02601, 22.36199, 23.68475, 24.99576, 26.2962, 27.58709,
28.86928, 30.14351, 31.4104, 32.6705, 33.9244, 35.1725,
36.4151, 37.6525, 38.8852, 40.1133, 41.3372, 42.5569 };
return Q[N];
}
else
{
double A = 1.0/(4.5 * N); double H = (-0.0002 * 60)/N;
double Q = N * cube(1 - A + (1.645 - H) * sqrt(A));
return Q;
}
}
// Calculate 99% point of chi-squared distribution
double invchi99(int N)
// upper 99% point of chi-squared distribution
{
if (N < 0) Throw(Logic_error("Error in invchi99 arg"));
if (N < 30)
{
double Q[] = { 0, 6.63490, 9.21034, 11.3449, 13.2767, 15.0863,
16.8119, 18.4753, 20.0902, 21.6660, 23.2093, 24.7250,
26.2170, 27.6883, 29.1413, 30.5779, 31.9999, 33.4087,
34.8053, 36.1908, 37.5662, 38.9321, 40.2894, 41.6384,
42.9798, 44.3141, 45.6417, 46.9630, 48.2782, 49.5879 };
return Q[N];
}
else
{
double A = 1.0/(4.5 * N); double H = (0.0008 * 60)/N;
double Q = N * cube(1 - A + (2.326 - H) * sqrt(A));
return Q;
}
}