So the Weecology folks have published a large dataset on mammal communities in a data paper in Ecology. I know nothing about mammal communities, but that doesn't mean one can't play with the data...
Their dataset consists of five csv files: communities, references, sites, species, and trapping data.
Where are these sites, and by the way, do they vary much in altitude?
Let's zoom in on just 'the states'?

What phylogenies can we get for the species in this dataset?
We can use the rOpenSci package treebase to search the online phylogeny repository TreeBASE. Limiting to returning a max of 1 tree (to save time), we can see that X species are in at least 1 tree on the TreeBASE database. Nice.
So there are 321 species in the database with at least 1 tree in the TreeBASE database. Of course there could be many more, but we limited results from TreeBASE to just 1 tree per query.
Here's the code:
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# setwd("/Mac/R_stuff/Blog_etc/Mammal_Dataset") | |
# URLs for datasets | |
comm <- "http://esapubs.org/archive/ecol/E092/201/data/MCDB_communities.csv" | |
refs <- "http://esapubs.org/archive/ecol/E092/201/data/MCDB_references.csv" | |
sites <- "http://esapubs.org/archive/ecol/E092/201/data/MCDB_sites.csv" | |
spp <- "http://esapubs.org/archive/ecol/E092/201/data/MCDB_species.csv" | |
trap <- "http://esapubs.org/archive/ecol/E092/201/data/MCDB_trapping.csv" | |
# read them | |
require(plyr) | |
datasets <- llply(list(comm, refs, sites, spp, trap), read.csv, .progress='text') | |
str(datasets[[1]]); head(datasets[[1]]) # cool, worked | |
llply(datasets, head) # see head of all data.frame's | |
# Map the communities | |
require(ggplot2) | |
require(maps) | |
sitesdata <- datasets[[3]] | |
sitesdata <- sitesdata[!sitesdata$Latitude == 'NULL',] | |
sitesdata$Latitude <- as.numeric(as.character(sitesdata$Latitude)) | |
sitesdata$Longitude <- as.numeric(as.character(sitesdata$Longitude)) | |
sitesdata$Elevation_high <- as.numeric(as.character(sitesdata$Elevation_high)) | |
sitesdata <- sitesdata[sitesdata$Longitude > -140,] | |
world_map <- map_data("world") | |
us <- world_map[world_map$region=='USA',] | |
# World map | |
ggplot(world_map, aes(long, lat)) + | |
theme_bw(base_size=16) + | |
geom_polygon(aes(group = group), colour="grey60", fill = 'white', size = .3) + | |
ylim(-55, 85) + | |
geom_jitter(data = sitesdata, aes(Longitude, Latitude, size=Elevation_high), alpha=0.3) | |
ggsave("worldmap.png") | |
# US only | |
ggplot(us, aes(long, lat)) + | |
theme_bw(base_size=16) + | |
geom_polygon(aes(group = group), colour="grey60", fill = 'white', size = .3) + | |
ylim(25, 50) + | |
xlim(-130, -60) + | |
geom_jitter(data = sitesdata, aes(Longitude, Latitude, size=Elevation_high), alpha=0.3) | |
ggsave("usmap.png") | |
# What phylogenies can we get for these species? | |
install.packages("treebase") | |
require(treebase); require(RCurl) | |
datasets[[4]]$gensp <- as.factor(paste(datasets[[4]]$Genus, datasets[[4]]$Species)) | |
trees <- llply(datasets[[4]]$gensp, function(x) search_treebase(paste("\"", x, '\"', sep=''), | |
by="taxon", max_trees=1, exact_match=TRUE, curl=getCurlHandle())) | |
spwithtrees <- data.frame(datasets[[4]]$gensp, | |
laply(trees, function(x) if(length(unlist(x)) > 0){1} else{0})) | |
names(spwithtrees) <- c('species', 'trees') | |
length(spwithtrees[spwithtrees$trees == 1, 2]) |
No comments:
Post a Comment