Homework 9

1. Use the code that you worked on in Homework #8 (creating fake data sets), and re-organize it following the principles of structured programming. Do all the work in a single chunk in your R markdown file, just as if you were writing a single R script. Start with all of your annotated functions, preliminary calls, and global variables. The program body should be only a few lines of code that call the appropriate functions and run them in the correct order. Make sure that the output from one function serves as the input to the next. You can either daisy-chain the functions or write separate lines of code to hold elements in temporary variables and pass them along.

Precalculations for shape and scale using mean and variance

because: \[ mean=shape*scale \] then: \[ shape = mean/scale \] \[ scale = mean/shape \]

because: \[ variance=shape*scale^2 \] then: \[ scale = sqrt(variance/shape) \] replace scale with above equation: \[ mean/shape = sqrt(variance/shape) \] isolate shape: \[ mean^2/shape^2 = variance/shape \] \[ (mean^2 * shape)/shape^2 = variance \] \[ mean^2 / shape = variance \] \[ shape = mean^2 / variance \]

# Global variables ---------------------------
library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.1.2

## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --

## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.4     v dplyr   1.0.7
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   2.0.1     v forcats 0.5.1

## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

n_group = 4
n_name = c("Control","Fe","Fe, P, N", "P, N")
n_size = c(12,12,12,12)
n_mean = c(1.2,1.3,5.7,3.0)
n_sd = c(0.05,0.05,0.75,0.3)

shape1 = n_mean[1]^2*n_sd[1]
shape2 = n_mean[2]^2*n_sd[2]
shape3 = n_mean[3]^2*n_sd[3]
shape4 = n_mean[4]^2*n_sd[4]

scale1 = n_mean[1]/shape1
scale2 = n_mean[2]/shape2
scale3 = n_mean[3]/shape3
scale4 = n_mean[4]/shape4

n_shape = c(shape1,shape2,shape3,shape4)
n_scale = c(scale1,scale2,scale3,scale4)


# ----------------------------------------
# FUNCTION sim_data
# description: description
# inputs: sample size, means, sdeviations
# outputs: data frame
##########################################
sim_data = function(size=n_size,shape=n_shape,scale=n_scale) {
  ID = 1:sum(n_size)
  
  chla_conc = c(rgamma(n=size[1],shape=shape[1],scale=scale[1]),
                rgamma(n=size[2],shape=shape[2],scale=scale[2]),
                rgamma(n=size[3],shape=shape[3],scale=scale[3]),
                rgamma(n=size[4],shape=shape[4],scale=scale[4]))
  
  treatment = rep(n_name,n_size)
  
  d_frame = data.frame(ID,treatment,chla_conc)
  
  return(d_frame)
}

# ----------------------------------------

my_df = sim_data()
print(my_df)

##    ID treatment    chla_conc
## 1   1   Control 5.032431e-01
## 2   2   Control 8.716588e-02
## 3   3   Control 1.252431e+00
## 4   4   Control 2.247376e-02
## 5   5   Control 1.230652e-08
## 6   6   Control 1.515078e-06
## 7   7   Control 6.320461e-04
## 8   8   Control 5.032060e-04
## 9   9   Control 3.680527e-11
## 10 10   Control 1.071701e-03
## 11 11   Control 2.699798e-04
## 12 12   Control 2.411661e+00
## 13 13        Fe 8.600485e-02
## 14 14        Fe 4.118113e-18
## 15 15        Fe 1.511460e-04
## 16 16        Fe 2.420220e-01
## 17 17        Fe 4.590968e+00
## 18 18        Fe 4.754914e+00
## 19 19        Fe 9.219515e-05
## 20 20        Fe 2.969996e-01
## 21 21        Fe 7.307874e-10
## 22 22        Fe 6.005609e-01
## 23 23        Fe 1.870622e-03
## 24 24        Fe 2.914075e-05
## 25 25  Fe, P, N 5.029754e+00
## 26 26  Fe, P, N 4.453938e+00
## 27 27  Fe, P, N 7.659168e+00
## 28 28  Fe, P, N 7.437445e+00
## 29 29  Fe, P, N 5.825980e+00
## 30 30  Fe, P, N 4.336396e+00
## 31 31  Fe, P, N 4.374081e+00
## 32 32  Fe, P, N 5.047326e+00
## 33 33  Fe, P, N 3.918535e+00
## 34 34  Fe, P, N 5.561596e+00
## 35 35  Fe, P, N 6.481317e+00
## 36 36  Fe, P, N 4.970266e+00
## 37 37      P, N 6.286053e+00
## 38 38      P, N 6.910204e+00
## 39 39      P, N 1.651021e+00
## 40 40      P, N 1.750040e+00
## 41 41      P, N 4.355834e+00
## 42 42      P, N 6.165407e+00
## 43 43      P, N 1.465521e+00
## 44 44      P, N 3.594347e+00
## 45 45      P, N 6.648466e-01
## 46 46      P, N 9.074557e+00
## 47 47      P, N 3.711822e+00
## 48 48      P, N 1.349554e+00

# ----------------------------------------
# FUNCTION anova_calc
# description: description
# inputs: input_description
# outputs: output_description
##########################################
anova_calc = function(df=my_df) {
  
  anova_model = aov(df$chla_conc~df$treatment,data=df)
  z = summary(anova_model)
  flat_out = unlist(z)
  anova_stats = list(f_ratio = unlist(z)[7],
                     f_pval = unlist(z)[9])
  
  return(anova_stats)
}

# ----------------------------------------

my_anova = anova_calc()
print(my_anova)

## $f_ratio
## F value1 
## 22.73488 
## 
## $f_pval
##     Pr(>F)1 
## 4.83672e-09

# ----------------------------------------
# FUNCTION anova_plot
# description: description
# inputs: input_description
# outputs: output_description
##########################################
anova_plot = function(df=my_df) {
  
  plot = ggplot2::ggplot(df) +
    aes(x=treatment,y=chla_conc) +
    geom_boxplot()
  
  suppressMessages(print(plot))
  
  return(plot)
  
}
# ----------------------------------------

my_plot = anova_plot()

print(my_plot)

2. Once your code is up and working, modify your program to do something else: record a new summary variable, code a new statistical analysis, or create a different set of random variables or output graph. Do not rewrite any of your existing functions. Instead, copy them, rename them, and then modify them to do new things. Once your new functions are written, add some more lines of program code, calling a mixture of your previous functions and your new functions to get the job done.

Changing rgamma to rnorm:

# ----------------------------------------
# FUNCTION sim_data
# description: description
# inputs: sample size, means, sdeviations
# outputs: data frame
##########################################
sim_data = function(size=n_size,mean=n_mean,sd=n_sd) {
  ID = 1:sum(n_size)
  
  chla_conc = c(rnorm(n=size[1],mean=mean[1],sd=sd[1]),
                rnorm(n=size[2],mean=mean[2],sd=sd[2]),
                rnorm(n=size[3],mean=mean[3],sd=sd[3]),
                rnorm(n=size[4],mean=mean[4],sd=sd[4]))
  
  treatment = rep(n_name,n_size)
  
  d_frame = data.frame(ID,treatment,chla_conc)
  
  return(d_frame)
}

# ----------------------------------------

my_df = sim_data()
print(my_df)

##    ID treatment chla_conc
## 1   1   Control  1.102324
## 2   2   Control  1.123430
## 3   3   Control  1.166888
## 4   4   Control  1.233632
## 5   5   Control  1.257347
## 6   6   Control  1.144966
## 7   7   Control  1.162795
## 8   8   Control  1.190889
## 9   9   Control  1.149275
## 10 10   Control  1.216137
## 11 11   Control  1.137425
## 12 12   Control  1.210018
## 13 13        Fe  1.272450
## 14 14        Fe  1.240321
## 15 15        Fe  1.355497
## 16 16        Fe  1.233076
## 17 17        Fe  1.248809
## 18 18        Fe  1.314302
## 19 19        Fe  1.289026
## 20 20        Fe  1.297006
## 21 21        Fe  1.404607
## 22 22        Fe  1.263892
## 23 23        Fe  1.336357
## 24 24        Fe  1.369098
## 25 25  Fe, P, N  7.510035
## 26 26  Fe, P, N  5.300366
## 27 27  Fe, P, N  5.293436
## 28 28  Fe, P, N  7.209535
## 29 29  Fe, P, N  6.174506
## 30 30  Fe, P, N  5.197237
## 31 31  Fe, P, N  6.268817
## 32 32  Fe, P, N  5.671865
## 33 33  Fe, P, N  6.100308
## 34 34  Fe, P, N  6.136944
## 35 35  Fe, P, N  4.781756
## 36 36  Fe, P, N  5.258187
## 37 37      P, N  3.385468
## 38 38      P, N  3.251259
## 39 39      P, N  2.959419
## 40 40      P, N  2.736772
## 41 41      P, N  2.772174
## 42 42      P, N  3.365274
## 43 43      P, N  2.974364
## 44 44      P, N  3.075309
## 45 45      P, N  3.070220
## 46 46      P, N  2.894846
## 47 47      P, N  2.738186
## 48 48      P, N  2.805975

# ----------------------------------------
# FUNCTION anova_calc
# description: description
# inputs: input_description
# outputs: output_description
##########################################
anova_calc = function(df=my_df) {
  
  anova_model = aov(df$chla_conc~df$treatment,data=df)
  z = summary(anova_model)
  flat_out = unlist(z)
  anova_stats = list(f_ratio = unlist(z)[7],
                     f_pval = unlist(z)[9])
  
  return(anova_stats)
}

# ----------------------------------------

my_anova = anova_calc()
print(my_anova)

## $f_ratio
## F value1 
## 312.8228 
## 
## $f_pval
##      Pr(>F)1 
## 1.112758e-29

# ----------------------------------------
# FUNCTION anova_plot
# description: description
# inputs: input_description
# outputs: output_description
##########################################
anova_plot = function(df=my_df) {
  
  plot = ggplot2::ggplot(df) +
    aes(x=treatment,y=chla_conc) +
    geom_boxplot()
  
  suppressMessages(print(plot))
  
  return(plot)
  
}
# ----------------------------------------

my_plot = anova_plot()

print(my_plot)