Import necessary libraries
# libraries
library(dbplyr)
library(tidyverse)
Import files
f1<-read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/assignment6part1.csv?raw=true")
f2<-read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/assignment6part2.csv?raw=true")
Pivot Files
#sort samples in F1 into gender and treatment
f1_pivot<-f1 %>% #name new file and reference already made file to construct it from
pivot_longer( #divide columns into rows
col = !ID, #pivot sample columns
names_to = c("Sample", "gender", "treatment"), #transform column names to three variables, sample, gender, and treatment
names_sep = "_", #This tells pivot_longer() to split the column names at the "_".
values_drop_na = TRUE, #drop values with missing data
) %>% pivot_wider(names_from = ID, values_from = value) #pipe an additional pivot that stretches the data from the ID columns into separate columns
#sort samples in F2 by treatment
f2_pivot<-f2 %>% #name new file and reference already made file to construct it from
pivot_longer( #divide columns into rows
col = !ID, #pivot sample columns
names_to = c("Sample", "treatment"), #transform column names to three variables, sample, gender, and treatment
names_sep = c("\\."), #This tells pivot_longer() to split the column names at the "." // are necessary for it to read as a period
values_drop_na = TRUE #drop values with missing data
) %>% pivot_wider(names_from = ID, values_from = value) #pipe an additional pivot that stretches the data from the ID columns into separate columns
Merge files
#join the two tables together
f3<-f1_pivot%>% #name new file and reference first file to merge
full_join(f2_pivot) #join two files and reference second file to merge
## Joining, by = c("Sample", "treatment")
Export Results
#export csv file
write_csv(f3, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_5/results/f3.csv")
head(f3)
## # A tibble: 6 × 6
## Sample gender treatment body_length age mass
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Sample1 Male Control 1.68 3 NA
## 2 Sample2 Male Control 4.31 7 NA
## 3 Sample3 Male Control 4.54 11 8.46
## 4 Sample4 Male Control 1.09 8 3.87
## 5 Sample5 Male Control 3.55 10 3.12
## 6 Sample6 Male Treatment 8.19 4 9.38
Link to download final file:
Calculate residual mass by sex and treatment
# create new column in the data calculating residual mass
f3$resididual_mass<-f3$mass/f3$body_length
#some code to make the summarize function acknowledge both group by categories
options(dplyr.summarise.inform = FALSE)
#make a new table with summarized data
f3_mean_sd<-f3%>% #name new object and reference already made object to construct it from
group_by(gender,treatment) %>% # group the new dataframe by gender and treatment
summarise(mean= mean(resididual_mass,na.rm = TRUE), SD=sd(resididual_mass,na.rm = TRUE)) #list mean and standard deviation for each category
Export table with mean and standard deviations
#export csv file
write_csv(f3_mean_sd, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_5/results/f3_mean_sd.csv")
head(f3_mean_sd)
## # A tibble: 4 × 4
## # Groups: gender [2]
## gender treatment mean SD
## <chr> <chr> <dbl> <dbl>
## 1 Female Control 2.83 2.41
## 2 Female Treatment 2.99 2.54
## 3 Male Control 2.10 1.36
## 4 Male Treatment 2.93 3.58
Link to download final file:
Link to R script