Import necessary libraries

# libraries
library(dbplyr)
library(tidyverse)

Part 1

Import files

f1<-read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/assignment6part1.csv?raw=true")
f2<-read_csv("https://github.com/mbtoomey/Biol_7263/blob/main/Data/assignment6part2.csv?raw=true")

Pivot Files

#sort samples in F1 into gender and treatment
f1_pivot<-f1 %>% #name new file and reference already made file to construct it from
  pivot_longer( #divide columns into rows
    col = !ID, #pivot sample columns
    names_to = c("Sample", "gender", "treatment"), #transform column names to three variables, sample, gender, and treatment
    names_sep = "_", #This tells pivot_longer() to split the column names at the "_". 
    values_drop_na = TRUE, #drop values with missing data
  ) %>% pivot_wider(names_from = ID, values_from = value) #pipe an additional pivot that stretches the data from the ID columns into separate columns

#sort samples in F2 by treatment
f2_pivot<-f2 %>% #name new file and reference already made file to construct it from
  pivot_longer( #divide columns into rows
    col = !ID, #pivot sample columns
    names_to = c("Sample", "treatment"), #transform column names to three variables, sample, gender, and treatment
    names_sep = c("\\."), #This tells pivot_longer() to split the column names at the "." // are necessary for it to read as a period
    values_drop_na = TRUE #drop values with missing data
  ) %>% pivot_wider(names_from = ID, values_from = value) #pipe an additional pivot that stretches the data from the ID columns into separate columns

Merge files

#join the two tables together
f3<-f1_pivot%>% #name new file and reference first file to merge
  full_join(f2_pivot) #join two files and reference second file to merge
## Joining, by = c("Sample", "treatment")

Export Results

#export csv file
write_csv(f3, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_5/results/f3.csv")

head(f3)
## # A tibble: 6 × 6
##   Sample  gender treatment body_length   age  mass
##   <chr>   <chr>  <chr>           <dbl> <dbl> <dbl>
## 1 Sample1 Male   Control          1.68     3 NA   
## 2 Sample2 Male   Control          4.31     7 NA   
## 3 Sample3 Male   Control          4.54    11  8.46
## 4 Sample4 Male   Control          1.09     8  3.87
## 5 Sample5 Male   Control          3.55    10  3.12
## 6 Sample6 Male   Treatment        8.19     4  9.38

Link to download final file:

f3

Part 2

Calculate residual mass by sex and treatment

# create new column in the data calculating residual mass
f3$resididual_mass<-f3$mass/f3$body_length

#some code to make the summarize function acknowledge both group by categories
options(dplyr.summarise.inform = FALSE)

#make a new table with summarized data
f3_mean_sd<-f3%>%   #name new object and reference already made object to construct it from
  group_by(gender,treatment) %>% # group the new dataframe by gender and treatment
  summarise(mean= mean(resididual_mass,na.rm = TRUE), SD=sd(resididual_mass,na.rm = TRUE)) #list mean and standard deviation for each category

Export table with mean and standard deviations

#export csv file
write_csv(f3_mean_sd, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_5/results/f3_mean_sd.csv")

head(f3_mean_sd)
## # A tibble: 4 × 4
## # Groups:   gender [2]
##   gender treatment  mean    SD
##   <chr>  <chr>     <dbl> <dbl>
## 1 Female Control    2.83  2.41
## 2 Female Treatment  2.99  2.54
## 3 Male   Control    2.10  1.36
## 4 Male   Treatment  2.93  3.58

Link to download final file:

f3_mean_sd

Link to R script

Assignment 5