1. Use the code that you worked on in Homework #8 (creating fake data sets), and re-organize it following the principles of structured programming. Do all the work in a single chunk in your R markdown file, just as if you were writing a single R script. Start with all of your annotated functions, preliminary calls, and global variables. The program body should be only a few lines of code that call the appropriate functions and run them in the correct order. Make sure that the output from one function serves as the input to the next. You can either daisy-chain the functions or write separate lines of code to hold elements in temporary variables and pass them along.
Precalculations for shape and scale using mean and variance
because: \[ mean=shape*scale \] then: \[ shape = mean/scale \] \[ scale = mean/shape \]
because: \[ variance=shape*scale^2 \] then: \[ scale = sqrt(variance/shape) \] replace scale with above equation: \[ mean/shape = sqrt(variance/shape) \] isolate shape: \[ mean^2/shape^2 = variance/shape \] \[ (mean^2 * shape)/shape^2 = variance \] \[ mean^2 / shape = variance \] \[ shape = mean^2 / variance \]
# Global variables ---------------------------
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
= 4
n_group = c("Control","Fe","Fe, P, N", "P, N")
n_name = c(12,12,12,12)
n_size = c(1.2,1.3,5.7,3.0)
n_mean = c(0.05,0.05,0.75,0.3)
n_sd
= n_mean[1]^2*n_sd[1]
shape1 = n_mean[2]^2*n_sd[2]
shape2 = n_mean[3]^2*n_sd[3]
shape3 = n_mean[4]^2*n_sd[4]
shape4
= n_mean[1]/shape1
scale1 = n_mean[2]/shape2
scale2 = n_mean[3]/shape3
scale3 = n_mean[4]/shape4
scale4
= c(shape1,shape2,shape3,shape4)
n_shape = c(scale1,scale2,scale3,scale4)
n_scale
# ----------------------------------------
# FUNCTION sim_data
# description: description
# inputs: sample size, means, sdeviations
# outputs: data frame
##########################################
= function(size=n_size,shape=n_shape,scale=n_scale) {
sim_data = 1:sum(n_size)
ID
= c(rgamma(n=size[1],shape=shape[1],scale=scale[1]),
chla_conc rgamma(n=size[2],shape=shape[2],scale=scale[2]),
rgamma(n=size[3],shape=shape[3],scale=scale[3]),
rgamma(n=size[4],shape=shape[4],scale=scale[4]))
= rep(n_name,n_size)
treatment
= data.frame(ID,treatment,chla_conc)
d_frame
return(d_frame)
}
# ----------------------------------------
= sim_data()
my_df print(my_df)
## ID treatment chla_conc
## 1 1 Control 5.032431e-01
## 2 2 Control 8.716588e-02
## 3 3 Control 1.252431e+00
## 4 4 Control 2.247376e-02
## 5 5 Control 1.230652e-08
## 6 6 Control 1.515078e-06
## 7 7 Control 6.320461e-04
## 8 8 Control 5.032060e-04
## 9 9 Control 3.680527e-11
## 10 10 Control 1.071701e-03
## 11 11 Control 2.699798e-04
## 12 12 Control 2.411661e+00
## 13 13 Fe 8.600485e-02
## 14 14 Fe 4.118113e-18
## 15 15 Fe 1.511460e-04
## 16 16 Fe 2.420220e-01
## 17 17 Fe 4.590968e+00
## 18 18 Fe 4.754914e+00
## 19 19 Fe 9.219515e-05
## 20 20 Fe 2.969996e-01
## 21 21 Fe 7.307874e-10
## 22 22 Fe 6.005609e-01
## 23 23 Fe 1.870622e-03
## 24 24 Fe 2.914075e-05
## 25 25 Fe, P, N 5.029754e+00
## 26 26 Fe, P, N 4.453938e+00
## 27 27 Fe, P, N 7.659168e+00
## 28 28 Fe, P, N 7.437445e+00
## 29 29 Fe, P, N 5.825980e+00
## 30 30 Fe, P, N 4.336396e+00
## 31 31 Fe, P, N 4.374081e+00
## 32 32 Fe, P, N 5.047326e+00
## 33 33 Fe, P, N 3.918535e+00
## 34 34 Fe, P, N 5.561596e+00
## 35 35 Fe, P, N 6.481317e+00
## 36 36 Fe, P, N 4.970266e+00
## 37 37 P, N 6.286053e+00
## 38 38 P, N 6.910204e+00
## 39 39 P, N 1.651021e+00
## 40 40 P, N 1.750040e+00
## 41 41 P, N 4.355834e+00
## 42 42 P, N 6.165407e+00
## 43 43 P, N 1.465521e+00
## 44 44 P, N 3.594347e+00
## 45 45 P, N 6.648466e-01
## 46 46 P, N 9.074557e+00
## 47 47 P, N 3.711822e+00
## 48 48 P, N 1.349554e+00
# ----------------------------------------
# FUNCTION anova_calc
# description: description
# inputs: input_description
# outputs: output_description
##########################################
= function(df=my_df) {
anova_calc
= aov(df$chla_conc~df$treatment,data=df)
anova_model = summary(anova_model)
z = unlist(z)
flat_out = list(f_ratio = unlist(z)[7],
anova_stats f_pval = unlist(z)[9])
return(anova_stats)
}
# ----------------------------------------
= anova_calc()
my_anova print(my_anova)
## $f_ratio
## F value1
## 22.73488
##
## $f_pval
## Pr(>F)1
## 4.83672e-09
# ----------------------------------------
# FUNCTION anova_plot
# description: description
# inputs: input_description
# outputs: output_description
##########################################
= function(df=my_df) {
anova_plot
= ggplot2::ggplot(df) +
plot aes(x=treatment,y=chla_conc) +
geom_boxplot()
suppressMessages(print(plot))
return(plot)
}# ----------------------------------------
= anova_plot() my_plot
print(my_plot)
2. Once your code is up and working, modify your program to do something else: record a new summary variable, code a new statistical analysis, or create a different set of random variables or output graph. Do not rewrite any of your existing functions. Instead, copy them, rename them, and then modify them to do new things. Once your new functions are written, add some more lines of program code, calling a mixture of your previous functions and your new functions to get the job done.
Changing rgamma to rnorm:
# ----------------------------------------
# FUNCTION sim_data
# description: description
# inputs: sample size, means, sdeviations
# outputs: data frame
##########################################
= function(size=n_size,mean=n_mean,sd=n_sd) {
sim_data = 1:sum(n_size)
ID
= c(rnorm(n=size[1],mean=mean[1],sd=sd[1]),
chla_conc rnorm(n=size[2],mean=mean[2],sd=sd[2]),
rnorm(n=size[3],mean=mean[3],sd=sd[3]),
rnorm(n=size[4],mean=mean[4],sd=sd[4]))
= rep(n_name,n_size)
treatment
= data.frame(ID,treatment,chla_conc)
d_frame
return(d_frame)
}
# ----------------------------------------
= sim_data()
my_df print(my_df)
## ID treatment chla_conc
## 1 1 Control 1.102324
## 2 2 Control 1.123430
## 3 3 Control 1.166888
## 4 4 Control 1.233632
## 5 5 Control 1.257347
## 6 6 Control 1.144966
## 7 7 Control 1.162795
## 8 8 Control 1.190889
## 9 9 Control 1.149275
## 10 10 Control 1.216137
## 11 11 Control 1.137425
## 12 12 Control 1.210018
## 13 13 Fe 1.272450
## 14 14 Fe 1.240321
## 15 15 Fe 1.355497
## 16 16 Fe 1.233076
## 17 17 Fe 1.248809
## 18 18 Fe 1.314302
## 19 19 Fe 1.289026
## 20 20 Fe 1.297006
## 21 21 Fe 1.404607
## 22 22 Fe 1.263892
## 23 23 Fe 1.336357
## 24 24 Fe 1.369098
## 25 25 Fe, P, N 7.510035
## 26 26 Fe, P, N 5.300366
## 27 27 Fe, P, N 5.293436
## 28 28 Fe, P, N 7.209535
## 29 29 Fe, P, N 6.174506
## 30 30 Fe, P, N 5.197237
## 31 31 Fe, P, N 6.268817
## 32 32 Fe, P, N 5.671865
## 33 33 Fe, P, N 6.100308
## 34 34 Fe, P, N 6.136944
## 35 35 Fe, P, N 4.781756
## 36 36 Fe, P, N 5.258187
## 37 37 P, N 3.385468
## 38 38 P, N 3.251259
## 39 39 P, N 2.959419
## 40 40 P, N 2.736772
## 41 41 P, N 2.772174
## 42 42 P, N 3.365274
## 43 43 P, N 2.974364
## 44 44 P, N 3.075309
## 45 45 P, N 3.070220
## 46 46 P, N 2.894846
## 47 47 P, N 2.738186
## 48 48 P, N 2.805975
# ----------------------------------------
# FUNCTION anova_calc
# description: description
# inputs: input_description
# outputs: output_description
##########################################
= function(df=my_df) {
anova_calc
= aov(df$chla_conc~df$treatment,data=df)
anova_model = summary(anova_model)
z = unlist(z)
flat_out = list(f_ratio = unlist(z)[7],
anova_stats f_pval = unlist(z)[9])
return(anova_stats)
}
# ----------------------------------------
= anova_calc()
my_anova print(my_anova)
## $f_ratio
## F value1
## 312.8228
##
## $f_pval
## Pr(>F)1
## 1.112758e-29
# ----------------------------------------
# FUNCTION anova_plot
# description: description
# inputs: input_description
# outputs: output_description
##########################################
= function(df=my_df) {
anova_plot
= ggplot2::ggplot(df) +
plot aes(x=treatment,y=chla_conc) +
geom_boxplot()
suppressMessages(print(plot))
return(plot)
}# ----------------------------------------
= anova_plot() my_plot
print(my_plot)