Some preliminary code

#load the necessary library packages
library (dplyr)
library(tidyverse)

#import ebird data into R
Toomey_ebird<-read.csv("~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_4/MBT_ebird.csv")
head (Toomey_ebird)
##   X   list_ID common_name    scientific_name       date     time count duration
## 1 1 S40748758  Snow Goose Anser caerulescens 2017-11-26 10:28 AM    16       20
## 2 2 S33616660  Snow Goose Anser caerulescens 2017-01-12 07:00 AM     1       90
## 3 3 S33809874  Snow Goose Anser caerulescens 2017-01-20 04:26 PM     1       59
## 4 4 S35533959  Snow Goose Anser caerulescens 2017-03-30 07:05 AM     1      100
## 5 5 S35698031  Snow Goose Anser caerulescens 2017-04-04 07:00 AM     1      127
## 6 6 S35861224  Snow Goose Anser caerulescens 2017-04-10 06:06 PM     1       68
##   location latitude longitude count_tot month year
## 1    US-MO 38.87193 -90.18439       369    11 2017
## 2    US-MO 38.63891 -90.28538       272     1 2017
## 3    US-MO 38.63891 -90.28538       188     1 2017
## 4    US-MO 38.63891 -90.28538       283     3 2017
## 5    US-MO 38.63891 -90.28538       369     4 2017
## 6    US-MO 38.63891 -90.28538        28     4 2017

Problem 1:

year_count<-Toomey_ebird %>% #reference which file to count data in
  group_by(year) %>% #group data by year
  summarize(total=sum(count)) #add the count data and give the total for each year

the most birds were observed in 2014 and 9303 birds were observed

Problem 2:

Species_2014<-Toomey_ebird %>% #create a data frame with information from the ebird file
  filter(year == 2014) %>% #only use birds from 2014
  count(common_name)  #list all the common names of birds 

nrow(Species_2014) #list the number of rows in the above data frame
## [1] 210

A total of 210 species were observed in 2014

Problem 3:

RWBL<-Toomey_ebird %>% #create a data frame with information from the ebird file
  filter(common_name == "Red-winged Blackbird") %>% #only use birds from 2014
  count(location, sort = TRUE) 

Birds were most commonly observed in Missouri

Problem 4:

time_filter<-Toomey_ebird %>%#create a data frame with information from the ebird file
  filter(duration > 5) %>% #in the data frame put only birds with a duration greater than 5
  filter(duration<200)#in the data frame put only birds with a duration less than 200
 
species_day<-time_filter%>% #create a data frame with information from filtered file we just created
  group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
  count(common_name) #list the species observed on each day

options(dplyr.summarise.inform = FALSE)#some code to make sure the summarize command is counting the smaller group

species_perday<-species_day%>% #create a data frame with information from filtered file we just created
  group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
  summarise(species_per_day=n()) #count the number of rows for each day, which is equal to the number of species

duration_perday<-time_filter%>% #create a data frame with information from grouped file 
  group_by(year,date)%>% #group by year and date, the year is redundant but we will need that information for later
  summarise(duration=mean(duration)) #give the average duration of time for each day, which bears the question did Dr.Toomey ever have more than one checklist per day?

duration_year<-merge(duration_perday,species_perday) #merge the checklists with the species per day and duration per day
duration_year$species_per_min<-duration_year$species_per_day/duration_year$duration #add a variable to the merged checklist that divides species by duration

duration_year%>%  # use the data frame we jsut created
  group_by(year) %>% # group the dataframe by year
  summarise(average_species_per_minute= mean(species_per_min)) #take an average of the mean species per minute for each year
## # A tibble: 13 × 2
##     year average_species_per_minute
##    <int>                      <dbl>
##  1  2003                     0.0306
##  2  2004                     0.0685
##  3  2009                     0.133 
##  4  2013                     0.187 
##  5  2014                     0.311 
##  6  2015                     0.300 
##  7  2016                     0.297 
##  8  2017                     0.392 
##  9  2018                     0.303 
## 10  2019                     0.279 
## 11  2020                     0.513 
## 12  2021                     0.370 
## 13  2022                     0.461

Problem 5:

top_10<-Toomey_ebird%>% #create a new data frame and reference which data frame you're creating it from
  count (common_name)%>% #count the occurrence of each common name
  arrange(by=(desc(n))) #sort by the count from greatest to least
top_10_list<-as.list(top_10$common_name[1:10]) #create a list from the top 10 birds 

top_10_tibble<- as_tibble(Toomey_ebird%>% #create a tibble
  filter(common_name==top_10_list)) #filter based off the species in the list

#write tibble to a .csv
write_csv(top_10_tibble, "~/Documents/School/Biol7263/WestBIOL7263/assignments/assn_4/results/Toomey_top_10.csv")

head(top_10_tibble)
## # A tibble: 6 × 14
##       X list_ID  common_name scientific_name date  time  count duration location
##   <int> <chr>    <chr>       <chr>           <chr> <chr> <int>    <int> <chr>   
## 1    27 S226356… Canada Goo… Branta canaden… 2015… 04:3…     2       20 US-VT   
## 2    37 S100948… Canada Goo… Branta canaden… 2022… 11:3…     4      223 US-OK   
## 3    47 S368404… Canada Goo… Branta canaden… 2017… 08:0…     2       25 US-MO   
## 4    57 S173450… Canada Goo… Branta canaden… 2014… 08:2…    38       55 US-MO   
## 5    67 S178915… Canada Goo… Branta canaden… 2014… 09:0…     6       90 US-MO   
## 6    77 S210959… Canada Goo… Branta canaden… 2014… 08:0…   134      120 US-MO   
## # … with 5 more variables: latitude <dbl>, longitude <dbl>, count_tot <int>,
## #   month <int>, year <int>

Link to download final file:

Toomey_top_10

Link to script

Assignment 4