第 7 章 分布检验
7.1 Q-Q图
=read.table("data/ind.txt")
x=x$V1
xpar(mfrow=c(1,2))
qqplot(qnorm(((1:length(x))-0.5)/20,15,0.04),x)
=(x-mean(x))/sd(x)
zqqnorm(z);qqline(z)
7.2 7.1 Kolmogorov-Smirnov单样本检验
7.2.1 KS检验
ks.test(x,"pnorm",15,0.2)
##
## One-sample Kolmogorov-Smirnov test
##
## data: x
## D = 0.33943, p-value = 0.0147
## alternative hypothesis: two-sided
7.2.2 正态性检验
shapiro.test(x)
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.97442, p-value = 0.8439
library(nortest)
lillie.test(x)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: x
## D = 0.11599, p-value = 0.6847
ad.test(x)
##
## Anderson-Darling normality test
##
## data: x
## A = 0.24208, p-value = 0.7364
cvm.test(x)
##
## Cramer-von Mises normality test
##
## data: x
## W = 0.03417, p-value = 0.7708
pearson.test(x)
##
## Pearson chi-square normality test
##
## data: x
## P = 3.1, p-value = 0.5412
sf.test(x)
##
## Shapiro-Francia normality test
##
## data: x
## W = 0.9683, p-value = 0.6274
library(fBasics)
## Loading required package: timeDate
## Loading required package: timeSeries
normalTest(x)
##
## Title:
## Shapiro - Wilk Normality Test
##
## Test Results:
## STATISTIC:
## W: 0.9744
## P VALUE:
## 0.8439
##
## Description:
## Wed Aug 3 23:44:24 2022 by user:
ksnormTest(x)
##
## Title:
## One-sample Kolmogorov-Smirnov test
##
## Test Results:
## STATISTIC:
## D: 1
## P VALUE:
## Alternative Two-Sided: < 2.2e-16
## Alternative Less: < 2.2e-16
## Alternative Greater: 1
##
## Description:
## Wed Aug 3 23:44:24 2022 by user:
shapiroTest(x)
##
## Title:
## Shapiro - Wilk Normality Test
##
## Test Results:
## STATISTIC:
## W: 0.9744
## P VALUE:
## 0.8439
##
## Description:
## Wed Aug 3 23:44:24 2022 by user:
jarqueberaTest(x)
##
## Title:
## Jarque - Bera Normalality Test
##
## Test Results:
## STATISTIC:
## X-squared: 0.4222
## P VALUE:
## Asymptotic p Value: 0.8097
##
## Description:
## Wed Aug 3 23:44:24 2022 by user:
dagoTest(x)
##
## Title:
## D'Agostino Normality Test
##
## Test Results:
## STATISTIC:
## Chi2 | Omnibus: 0.9747
## Z3 | Skewness: -0.79
## Z4 | Kurtosis: 0.5922
## P VALUE:
## Omnibus Test: 0.6142
## Skewness Test: 0.4295
## Kurtosis Test: 0.5537
##
## Description:
## Wed Aug 3 23:44:24 2022 by user:
7.3 7.2 Kolmogorov-Smirnov两样本分布检验
=read.table("data/ks2.txt",header=F);
zx=z[z[,2]==1,1]);(y=z[z[,2]==2,1]) (
## [1] 5.38 4.38 9.33 3.66 3.72 1.66 0.23 0.08 2.36 1.71 2.01 0.90 1.54
## [1] 6.67 16.21 11.93 9.85 10.43 13.54 2.40 12.89 9.30 11.92 5.74 14.45
## [13] 1.99 9.14 2.89
ks.test(x,y)
##
## Two-sample Kolmogorov-Smirnov test
##
## data: x and y
## D = 0.72308, p-value = 0.0004714
## alternative hypothesis: two-sided
拟合优度\(\chi^2\)检验
7.4 7.3 Pearson \(\chi^2\) 拟合优度检验
=c(490,334,68,16);n=sum(Ob);
Ob=c(t(0:3)%*%Ob/n)
lambda=exp(-lambda)*lambda^(0:3)/factorial(0:3)
p=p*n;
E=sum((E-Ob)^2/E);
Q=pchisq(Q,2,low=F) pvalue
7.4.1 Goodness-of-Fit Tests for a Single Discrete Random Variable
#Suppose we roll a die n = 370 times
#and we observe the frequencies (58, 55, 62, 68, 66, 61).
#Suppose we are interested in testing to see if the die is fair;
#i.e., p(j) ≡ 1/6.
<- c(58,55,62,68,66,61)
x <- chisq.test(x)
chifit chifit
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 1.9027, df = 5, p-value = 0.8624
round(chifit$expected,digits=4)
## [1] 61.6667 61.6667 61.6667 61.6667 61.6667 61.6667
round((chifit$residuals)^2,digits=4)
## [1] 0.2180 0.7207 0.0018 0.6505 0.3045 0.0072
#(Birth Rate of Males to Swedish Ministers).
#This data is discussed on page 266 of Daniel (1978).
#It concerns the number of males in the first seven children
#for n = 1334 Swedish ministers of religion.
<-c(6,57,206,362,365,256,69,13)
oc<-sum(oc)
n<-0:7
range<-sum(range*oc)/(n*7)
phat<-dbinom(range,7,phat)
pmfrbind(range,round(pmf,3))
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## range 0.000 1.000 2.00 3.000 4.00 5.000 6.000 7.000
## 0.006 0.047 0.15 0.265 0.28 0.178 0.063 0.009
<-chisq.test(oc,p=pmf)
test.resultpchisq(test.result$statistic,df=6,lower.tail=FALSE)
## X-squared
## 0.4257546
round(test.result$expected,1)
## [1] 8.5 63.2 200.6 353.7 374.1 237.4 83.7 12.6
7.4.2 Several Discrete Random Variables
#(Type of Crime and Alcoholic Status).
#The contingency table, Table 2.1,
#contains the frequencies of criminals who committed certain crimes and whether or not they are alcoholics.
#We are interested in seeing whether or not the distribution of alcoholic status is the same for each type of crime.
#The data were obtained from Kendall and Stuart (1979).
<- c(50,88,155,379,18,63)
c1 <- c(43,62,110,300,14,144)
c2 <- cbind(c1,c2)
ct <- chisq.test(ct)
chifit chifit
##
## Pearson's Chi-squared test
##
## data: ct
## X-squared = 49.731, df = 5, p-value = 1.573e-09
$residuals)^2 #residuals=(observed - expected) / sqrt(expected) (chifit
## c1 c2
## [1,] 0.01617684 0.01809979
## [2,] 0.97600214 1.09202023
## [3,] 1.62222220 1.81505693
## [4,] 1.16680759 1.30550686
## [5,] 0.07191850 0.08046750
## [6,] 19.61720859 21.94912045
<- ct[-6,]
ct2 chisq.test(ct2)
##
## Pearson's Chi-squared test
##
## data: ct2
## X-squared = 1.1219, df = 4, p-value = 0.8908