# Problem_14_9_40.r

# 1.0 Read in data ----
#       See Problem 14.9.40
#   Data from calibration of a proving ring, a device for measuring force
#   Hockersmith and Ku 1969).


provingring=read.table(file="Rice 3e Datasets/ASCII Comma/Chapter 14/provingring.txt",
  sep=",",stringsAsFactors = FALSE,
  header=TRUE)

Deflection=as.numeric(provingring$Deflection)

Load=as.numeric(provingring$Load)
LoadSq=Load*Load
plot(Load, Deflection)
# (a). The plot of load versus deflection looks linear.
lmfit1=lm(Deflection ~ Load)

summary(lmfit1)
## 
## Call:
## lm(formula = Deflection ~ Load)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7819 -0.3798 -0.1760  0.3800  0.9513 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.468e+00  2.203e-01   -6.665 3.12e-07 ***
## Load         6.892e-03  3.551e-06 1940.923  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5586 on 28 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 3.767e+06 on 1 and 28 DF,  p-value: < 2.2e-16
plot(Load, Deflection)
abline(lmfit1,col='green')

plot(lmfit1$residuals)

plot(Load, lmfit1$residuals)

# (b).  The residuals from the linear fit show lack of fit
#       The residuals are positive at the edges and negative in the middle of the Load values


# (c). Fit deflection as a quadratic function of load.
lmfit2=lm(Deflection ~ Load + LoadSq)
summary(lmfit2)
## 
## Call:
## lm(formula = Deflection ~ Load + LoadSq)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.19832 -0.08509  0.02033  0.07533  0.14339 
## 
## Coefficients:
##              Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) 1.363e-01  7.284e-02    1.871   0.0722 .  
## Load        6.812e-03  3.042e-06 2239.355   <2e-16 ***
## LoadSq      7.294e-10  2.695e-11   27.065   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1073 on 27 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 5.11e+07 on 2 and 27 DF,  p-value: < 2.2e-16
plot(Load, Deflection)
abline(lmfit1,col='green')
lines(Load, lmfit2$fitted.values,col="red")

plot(lmfit2$residuals)

plot(Load, lmfit2$residuals)
abline(h=0,col='gray')

# The fit looks better.  It is apparent that the variability across runs within a 
# given case of Load is much lower than the variability across Loads
# The errors in the measurements apparently have two sources of variability.