Some preliminary code
#load the necessary library packages
library (dplyr)
library(tidyverse)
#import ebird data into R
Toomey_ebird<-read.csv("~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_4/MBT_ebird.csv")
head (Toomey_ebird)
## X list_ID common_name scientific_name date time count duration
## 1 1 S40748758 Snow Goose Anser caerulescens 2017-11-26 10:28 AM 16 20
## 2 2 S33616660 Snow Goose Anser caerulescens 2017-01-12 07:00 AM 1 90
## 3 3 S33809874 Snow Goose Anser caerulescens 2017-01-20 04:26 PM 1 59
## 4 4 S35533959 Snow Goose Anser caerulescens 2017-03-30 07:05 AM 1 100
## 5 5 S35698031 Snow Goose Anser caerulescens 2017-04-04 07:00 AM 1 127
## 6 6 S35861224 Snow Goose Anser caerulescens 2017-04-10 06:06 PM 1 68
## location latitude longitude count_tot month year
## 1 US-MO 38.87193 -90.18439 369 11 2017
## 2 US-MO 38.63891 -90.28538 272 1 2017
## 3 US-MO 38.63891 -90.28538 188 1 2017
## 4 US-MO 38.63891 -90.28538 283 3 2017
## 5 US-MO 38.63891 -90.28538 369 4 2017
## 6 US-MO 38.63891 -90.28538 28 4 2017
Problem 1:
year_count<-Toomey_ebird %>% #reference which file to count data in
group_by(year) %>% #group data by year
summarize(total=sum(count)) #add the count data and give the total for each year
the most birds were observed in 2014 and 9303 birds were observed
Problem 2:
Species_2014<-Toomey_ebird %>% #create a data frame with information from the ebird file
filter(year == 2014) %>% #only use birds from 2014
count(common_name) #list all the common names of birds
nrow(Species_2014) #list the number of rows in the above data frame
## [1] 210
A total of 210 species were observed in 2014
Problem 3:
RWBL<-Toomey_ebird %>% #create a data frame with information from the ebird file
filter(common_name == "Red-winged Blackbird") %>% #only use birds from 2014
count(location, sort = TRUE)
Birds were most commonly observed in Missouri
Problem 4:
time_filter<-Toomey_ebird %>%#create a data frame with information from the ebird file
filter(duration > 5) %>% #in the data frame put only birds with a duration greater than 5
filter(duration<200)#in the data frame put only birds with a duration less than 200
species_day<-time_filter%>% #create a data frame with information from filtered file we just created
group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
count(common_name) #list the species observed on each day
options(dplyr.summarise.inform = FALSE)#some code to make sure the summarize command is counting the smaller group
species_perday<-species_day%>% #create a data frame with information from filtered file we just created
group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
summarise(species_per_day=n()) #count the number of rows for each day, which is equal to the number of species
duration_perday<-time_filter%>% #create a data frame with information from grouped file
group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
summarise(duration=mean(duration)) #give the average duration of time for each day, which bears the question did Dr.Toomey ever have more than one checklist per day?
duration_year<-merge(duration_perday,species_perday) #merge the checklists with the species per day and duration per day
duration_year$species_per_min<-duration_year$species_per_day/duration_year$duration #add a variable to the merged checklist that divides species by duration
duration_year%>% # use the data frame we jsut created
group_by(year) %>% # group the dataframe by year
summarise(average_species_per_minute= mean(species_per_min)) #take an average of the mean species per minute for each year
## # A tibble: 13 × 2
## year average_species_per_minute
## <int> <dbl>
## 1 2003 0.0306
## 2 2004 0.0685
## 3 2009 0.133
## 4 2013 0.187
## 5 2014 0.311
## 6 2015 0.300
## 7 2016 0.297
## 8 2017 0.392
## 9 2018 0.303
## 10 2019 0.279
## 11 2020 0.513
## 12 2021 0.370
## 13 2022 0.461
Problem 5:
top_10<-Toomey_ebird%>% #create a new data frame and reference which data frame you're creating it from
count (common_name)%>% #count the occurrence of each common name
arrange(by=(desc(n))) #sort by the count from greatest to least
top_10_list<-as.list(top_10$common_name[1:10]) #create a list from the top 10 birds
top_10_tibble<- as_tibble(Toomey_ebird%>% #create a tibble
filter(common_name==top_10_list)) #filter based off the species in the list
#write tibble to a .csv
write_csv(top_10_tibble, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_4/results/Toomey_top_10.csv")
head(top_10_tibble)
## # A tibble: 6 × 14
## X list_ID common_name scientific_name date time count duration location
## <int> <chr> <chr> <chr> <chr> <chr> <int> <int> <chr>
## 1 27 S226356… Canada Goo… Branta canaden… 2015… 04:3… 2 20 US-VT
## 2 37 S100948… Canada Goo… Branta canaden… 2022… 11:3… 4 223 US-OK
## 3 47 S368404… Canada Goo… Branta canaden… 2017… 08:0… 2 25 US-MO
## 4 57 S173450… Canada Goo… Branta canaden… 2014… 08:2… 38 55 US-MO
## 5 67 S178915… Canada Goo… Branta canaden… 2014… 09:0… 6 90 US-MO
## 6 77 S210959… Canada Goo… Branta canaden… 2014… 08:0… 134 120 US-MO
## # … with 5 more variables: latitude <dbl>, longitude <dbl>, count_tot <int>,
## # month <int>, year <int>
Link to download final file:
Link to script