Normal Distribution Test

SciPy stats.normaltest

In [7]:
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
In [8]:
heights_male = np.array([100, 256, 238, 116, 286, 253, 112, 165, 246, 130, 217, 269, 155,
       136, 189, 235, 255, 113, 280, 222, 259, 177, 294, 290, 225, 113,
       163, 137, 172, 127])

heights_female = np.array([126, 172, 137, 163, 113, 225, 290, 294, 175, 259, 220, 280, 111,
       255, 235, 189, 136, 150, 269, 214, 130, 243, 165, 110, 253, 286,
       116, 238, 255, 99])

print("mean heights (male): {}".format(np.mean(heights_male)))
print("mean heights (female): {}".format(np.mean(heights_female)))
mean heights (male): 197.66666666666666
mean heights (female): 196.93333333333334
In [11]:
s, p = stats.wilcoxon(heights_female, heights_male)

if p < 0.05:
  print("null hypothesis rejected, significant difference between the data-sets")
else:
  print("null hypothesis accepted, no significant difference between the data-sets")

print("p value = {}".format(p))
null hypothesis accepted, no significant difference between the data-sets
p value = 0.9425801920860144
In [13]:
plt.hist(heights_male, color="Magenta", normed=1)
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [14]:
plt.hist(heights_female,color="yellow", normed=1);
plt.xlim(100,300)
plt.xlabel('Height');
plt.show()
In [15]:
SEM = []

for sample_size in range(3,len(heights_male)+1):
    sample = heights_male[0:sample_size]
    SEM.append(sample.std() / np.sqrt(sample_size))

plt.plot(range(3,len(heights_male)+1),SEM, marker='o', color='cyan')

plt.ylabel("Standard Error of the Mean ($SE_M$)")    
plt.xlabel("Sample size $(n)$")
plt.title("Relationship between $SE_M$ and $n$");
In [18]:
print(stats.normaltest(heights_male))
print(stats.normaltest(heights_female))
NormaltestResult(statistic=13.548310785013712, pvalue=0.0011429354242245898)
NormaltestResult(statistic=13.278600632632264, pvalue=0.001307942069480237)