# Rproject7_5_beeswax_solution.r

# 1.0 Read in data ----
#       See Example 10.2.1 A, data from White, Riethof, and Kushnir (1960)
#   
file0.beeswax<-"Rice 3e Datasets/ASCII Comma/Chapter 10/beeswax.txt"
data.beeswax<-read.table(file=file0.beeswax,sep=",", header=TRUE)
head(data.beeswax)
##   MeltingPoint Hydrocarbon
## 1        63.78       14.27
## 2        63.45       14.80
## 3        63.58       12.28
## 4        63.08       17.09
## 5        63.40       15.10
## 6        64.42       12.92
x.label="Hydrocarbon"
x=data.beeswax[,x.label]

# 2.0 Plots of the data 

#   2.1 Index Plot
plot(x, ylab=x.label, main="Beeswax Data (White et. al. 1960)")

#   2.2 Plot of the ECDF

plot(ecdf(x), verticals=TRUE, 
      col.points='blue', col.hor='red', col.vert='green',
     main=paste("ECDF of Beeswax ", x.label,sep=""))

#   2.3 Histogram

hist(x, main="Histogram (Counts)")

hist(x, main="Histogram (Density)", probability=TRUE)

#   Add plot of Fitted Normal 
grid.x<-seq(.95*min(x), 1.05*max(x), .01)
x.mean=mean(x)
x.var=var(x)
grid.x.normdensity=dnorm(grid.x, mean=x.mean, sd=sqrt(x.var))
lines(grid.x, grid.x.normdensity, col='green')

#   2.4 Normal QQ Plot

qqnorm(x)
#
#   Add line with slope=sqrt(x.var) and interecept=x.mean

abline(a=x.mean, b=sqrt(x.var), col='green')

#   The data appear close to normal

# The One-Sample Kolmogorov Smirnov test  confirms this:

ks.test(scale(x), y="pnorm")
## Warning in ks.test(scale(x), y = "pnorm"): ties should not be present for
## the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  scale(x)
## D = 0.0622, p-value = 0.9766
## alternative hypothesis: two-sided
#   3.0 Compute selected quantiles

list.probs=c(.10,.25,.50,.75,.9)
print(data.frame(cbind(prob=list.probs, 
                       quantile=quantile(x,probs=list.probs))))
##     prob quantile
## 10% 0.10   13.676
## 25% 0.25   14.070
## 50% 0.50   14.570
## 75% 0.75   15.115
## 90% 0.90   15.470
#  Consider measuring T, the percent hydrocarbon in another
#   sample  of the same size, and 
#   testing H0: no dilution vs H1: dilution at level 1%,3%, 5%
#
# A level alpha=.05 test rejects H0 if T>tstar

# Assume that the data are (approximately) normal with mean and variance
# given by those of the sample data of the problem.

alpha=.05
z.alpha=qnorm(1-alpha)
tstar=x.mean + z.alpha*sqrt(x.var)

print(c(alpha,z.alpha, x.mean, sqrt(x.var), tstar))
## [1]  0.0500000  1.6448536 14.5800000  0.7764197 15.8570968
# Evaluate power of test at dilution levels
# The impact of dilution would be to change the mean level of the distribution
# by adding the diluted percentage

# The power is the probability of falling in the rejection region conditioning
# on each case in the alternative hypothesis

list.dilutionLevel=c(1,3,5)
for (dilutionLevel in list.dilutionLevel){
  
  print(dilutionLevel)
  x.mean.adjusted=(1-dilutionLevel/100)*x.mean + (dilutionLevel/100)*85
  power.tstar=1-pnorm(tstar,mean=x.mean.adjusted, sd=sqrt(x.var))
  print(c(dilutionLevel, x.mean.adjusted, sqrt(x.var), tstar, power.tstar))
  print((tstar-x.mean.adjusted)/sqrt(x.var))
  }
## [1] 1
## [1]  1.0000000 15.2842000  0.7764197 15.8570968  0.2302967
## [1] 0.73787
## [1] 3
## [1]  3.0000000 16.6926000  0.7764197 15.8570968  0.8590581
## [1] -1.076097
## [1] 5
## [1]  5.0000000 18.1010000  0.7764197 15.8570968  0.9980742
## [1] -2.890065
# From these results, 
#     a 1 percent dilution would be detected with probability .23
#     a 3 percent dilution would be detected with probability .859
#     a 5 percent dilution would be detected with probability .998