Acoustic space scatter plot

Someone asked me for the code to make figure 3 in the Methods in Ecology and Evolution paper describing warbleR. So, here it is. The figure was made in part by my collaborator Grace Smith-Vidaurre, so thanks to Grace for sharing.

The figure shows the grouping of long-billed hermit songs in the acoustic space based on similarity of dominant frequency contours. Similarity was assessed using dynamic time warping. The scatterplot is based on the two axes from a classic multidimensional scaling. The figure also shows spectrograms for each of the song types. This figure is created with ggplot graphs and spectrograms which are put together in a multipanel graph using the grid package. Note that you’ll need to download recordings from Xeno-Canto (so internet connection required).

Load/install packages:

x <- c("ggplot2", "gtable", "grid", "warbleR")


out <- lapply(x, function(y) {
  if(!y %in% installed.packages()[,"Package"])  install.packages(y)
require(y, character.only = T) 
  })

The following is the same code found in the paper. Download recordings and run detection and acoustic analysis:

# Query Xeno-Canto for metadata using genus and species as keywords
Phae.lon <- querxc(qword = "Phaethornis longirostris", download = FALSE)

# Filter recordings by vocalization type
Phae.lon.song <- Phae.lon[grep("song", Phae.lon$Vocalization_type, ignore.case = TRUE),]

# Filter recordings by location
Phae.lon.song <- Phae.lon.song[grep("Sarapiqui, Heredia", Phae.lon.song$Locality,
ignore.case = FALSE),]

# Filter recordings by quality score
Phae.lon.song <- Phae.lon.song[Phae.lon.song$Quality == "A", ]

# Download desired recordings using filtered data frame as a query
setwd(tempdir())
querxc(X = Phae.lon.song, download = TRUE)

# Convert mp3 to wav format
# Simultaneously lower sampling rate for more speed in following analyses
  mp32wav(samp.rate = 22.05)
  
  # Automatically select signals within recordings using amplitude, duration and
  # frequency thresholds
  Phae.ad <- autodetec(bp = c(2, 9), threshold = 20, mindur = 0.09, maxdur = 0.22,
  ssmooth = 900, ls = TRUE, res = 100, flim= c(1, 12), wl = 300,
  set =TRUE, sxrow = 6, rows = 15, img = FALSE)
  
  # Filter selections by signal to noise ratio
  Phae.snr <- sig2noise(X = Phae.ad[seq(1, nrow(Phae.ad), 2), ], mar = 0.04)
  
  # Filter 5 selections from each recording
  Phae.hisnr <- Phae.snr[ave(-Phae.snr$SNR, Phae.snr$sound.files, FUN = rank) <= 5, ]
  
# warbleR function to extract frequency contours and return acoustic dissimilarity in one step
tsLBH <- dfDTW(Phae.hisnr, length.out = 30, bp = c(2, 9), img = FALSE)

#calulate 2 dimension using multidimensional scaling
lbhMDS <- cmdscale(tsLBH)

Extract recording IDs and select colors for each song type (note that this step requires visual classification of songs beforehand):

# extract recording IDs from file names
lbhMDS <- as.data.frame(lbhMDS)
lbhMDS$rid <- gsub( ".wav","", sapply(strsplit(as.character(Phae.hisnr$sound.files), "-",fixed=T), "[",3))

# categorize song types
# create a vector of song type classifications
lbhMDS$cols <- lbhMDS$song.type <- lbhMDS$rid
  
lbhMDS$song.type[grep("154070|154072", lbhMDS$rid)]  <-  "A"
lbhMDS$cols[grep("154070|154072", lbhMDS$rid)]  <-  topo.colors(10)[3]

lbhMDS$song.type[grep("154123", lbhMDS$rid)]  <-  "B"
lbhMDS$cols[grep("154123", lbhMDS$rid)]  <-  heat.colors(10)[1]

lbhMDS$song.type[grep("154129|154161", lbhMDS$rid)]  <-  "C"
lbhMDS$cols[grep("154129|154161", lbhMDS$rid)]  <- terrain.colors(10)[2]

lbhMDS$song.type[grep("154138", lbhMDS$rid)]  <-  "D"
lbhMDS$cols[grep("154138", lbhMDS$rid)]  <-  heat.colors(10)[6]

shps <- c(21:25, 4)
cols <- lbhMDS$cols[!duplicated(lbhMDS$song.type)]

Create first scatterplot:

p.mds <- ggplot(lbhMDS) + geom_point(aes(x = V1, y = V2, color = song.type,
                                                fill = song.type, 
                                                shape = rid), size = 7) +
  scale_colour_manual(values = cols) + scale_fill_manual(values = cols) +
  scale_shape_manual(values = shps) + 
  stat_ellipse(aes(x = V1, y = V2, fill = song.type),
               geom = "polygon", level = 0.95, alpha = 0.2) +
  guides(color = FALSE, shape = FALSE, fill = FALSE) +
  xlab("Dimension 1") + ylab("Dimension 2") + 
  theme(panel.background = element_rect(fill = "white"), plot.background = element_rect(fill = "white"), 
        panel.grid.major = element_line(size = 1, colour = "grey"), 
        panel.grid.minor = element_line(size = 0.75, colour = "grey"), 
        axis.line = element_line(size = 2.5, colour = "black"), 
        axis.title = element_text(size = 27), 
        axis.text = element_text(size = 27))
p.mds

plot of chunk create CMDS plot

Add color legend:

col.leg <- p.mds + guides(color = guide_legend("Song Type", nrow = 1, byrow = TRUE), 
                          shape = FALSE, size = FALSE) + 
  theme(legend.box = "horizontal", legend.position = "top", 
        legend.key.size = unit(1, "cm"), legend.title = element_text(size = 30),
        legend.text = element_text(size = 30), 
        legend.background = element_rect(fill = alpha("white", 0.4)),
        legend.key = element_rect(fill = alpha("white", 0.4)))

shape.leg <- p.mds + guides(color = FALSE, 
                            shape = guide_legend("Recordings", nrow = 1, byrow = TRUE), size = FALSE) + 
  theme(legend.box = "horizontal", legend.position = "top", 
        legend.key.size = unit(1, "cm"), legend.title = element_text(size = 27),
        legend.text = element_text(size = 27), 
        legend.background = element_rect(fill = alpha("white", 0.4)),
        legend.key = element_rect(fill = alpha("white", 0.4)))


col.leg

plot of chunk extract shape and color legends from CMDS plot

Create song type spectrograms:

## [[1]]

plot of chunk create song type spectrograms 1