# Rproject7_5_beeswax_solution.r
# 1.0 Read in data ----
# See Example 10.2.1 A, data from White, Riethof, and Kushnir (1960)
#
file0.beeswax<-"Rice 3e Datasets/ASCII Comma/Chapter 10/beeswax.txt"
data.beeswax<-read.table(file=file0.beeswax,sep=",", header=TRUE)
head(data.beeswax)
## MeltingPoint Hydrocarbon
## 1 63.78 14.27
## 2 63.45 14.80
## 3 63.58 12.28
## 4 63.08 17.09
## 5 63.40 15.10
## 6 64.42 12.92
x.label="Hydrocarbon"
x=data.beeswax[,x.label]
# 2.0 Plots of the data
# 2.1 Index Plot
plot(x, ylab=x.label, main="Beeswax Data (White et. al. 1960)")

# 2.2 Plot of the ECDF
plot(ecdf(x), verticals=TRUE,
col.points='blue', col.hor='red', col.vert='green',
main=paste("ECDF of Beeswax ", x.label,sep=""))

# 2.3 Histogram
hist(x, main="Histogram (Counts)")

hist(x, main="Histogram (Density)", probability=TRUE)
# Add plot of Fitted Normal
grid.x<-seq(.95*min(x), 1.05*max(x), .01)
x.mean=mean(x)
x.var=var(x)
grid.x.normdensity=dnorm(grid.x, mean=x.mean, sd=sqrt(x.var))
lines(grid.x, grid.x.normdensity, col='green')

# 2.4 Normal QQ Plot
qqnorm(x)
#
# Add line with slope=sqrt(x.var) and interecept=x.mean
abline(a=x.mean, b=sqrt(x.var), col='green')

# The data appear close to normal
# The One-Sample Kolmogorov Smirnov test confirms this:
ks.test(scale(x), y="pnorm")
## Warning in ks.test(scale(x), y = "pnorm"): ties should not be present for
## the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: scale(x)
## D = 0.0622, p-value = 0.9766
## alternative hypothesis: two-sided
# 3.0 Compute selected quantiles
list.probs=c(.10,.25,.50,.75,.9)
print(data.frame(cbind(prob=list.probs,
quantile=quantile(x,probs=list.probs))))
## prob quantile
## 10% 0.10 13.676
## 25% 0.25 14.070
## 50% 0.50 14.570
## 75% 0.75 15.115
## 90% 0.90 15.470
# Consider measuring T, the percent hydrocarbon in another
# sample of the same size, and
# testing H0: no dilution vs H1: dilution at level 1%,3%, 5%
#
# A level alpha=.05 test rejects H0 if T>tstar
# Assume that the data are (approximately) normal with mean and variance
# given by those of the sample data of the problem.
alpha=.05
z.alpha=qnorm(1-alpha)
tstar=x.mean + z.alpha*sqrt(x.var)
print(c(alpha,z.alpha, x.mean, sqrt(x.var), tstar))
## [1] 0.0500000 1.6448536 14.5800000 0.7764197 15.8570968
# Evaluate power of test at dilution levels
# The impact of dilution would be to change the mean level of the distribution
# by adding the diluted percentage
# The power is the probability of falling in the rejection region conditioning
# on each case in the alternative hypothesis
list.dilutionLevel=c(1,3,5)
for (dilutionLevel in list.dilutionLevel){
print(dilutionLevel)
x.mean.adjusted=(1-dilutionLevel/100)*x.mean + (dilutionLevel/100)*85
power.tstar=1-pnorm(tstar,mean=x.mean.adjusted, sd=sqrt(x.var))
print(c(dilutionLevel, x.mean.adjusted, sqrt(x.var), tstar, power.tstar))
print((tstar-x.mean.adjusted)/sqrt(x.var))
}
## [1] 1
## [1] 1.0000000 15.2842000 0.7764197 15.8570968 0.2302967
## [1] 0.73787
## [1] 3
## [1] 3.0000000 16.6926000 0.7764197 15.8570968 0.8590581
## [1] -1.076097
## [1] 5
## [1] 5.0000000 18.1010000 0.7764197 15.8570968 0.9980742
## [1] -2.890065
# From these results,
# a 1 percent dilution would be detected with probability .23
# a 3 percent dilution would be detected with probability .859
# a 5 percent dilution would be detected with probability .998