################################################################################
# R scripts to create figures on archaeozoological data from mainland Southeast
# Asian prehistoric sites.
#
# written by Cyler Conrad, Department of Anthropology, University of New Mexico
# cylerc@unm.edu
#
# Results described in:
# Conrad, C. (in press). Archaeozoology in Mainland Southeast Asia: Changing
# Methodology and Pleistocene to Holocene Forager Subsistence Patterns in
# Thailand and Peninsular Malaysia. Open Quaternary.
#
# NOTES
# All data required to perform the analyses can be found at
# http://hdl.handle.net/1928/25699 (Conrad 2015). The script was run using R
# version 3.1.1 on Mac OS X 10.8.5
#
# Reference:
# Conrad, C. 2015. Faunal Data for Pleistocene-Holocene Archaeological Sites in
# Thailand and Peninsular Malaysia [dataset]. University of New Mexico.
# http://hdl.handle.net/1928/25699
#
# LICENSE
#
# The MIT License (MIT)
#
# Copyright (c) 2015 Cyler Conrad
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
################################################################################
# set the base directory for knitr to the directory above this one
library(knitr)
opts_knit$set(root.dir = '../')
################################################################################
# CHUNK 1: load libraries
################################################################################
library(ggplot2) #Version 1.0.0
library(reshape2)
library(gridExtra)
## Loading required package: grid
# define a custom plot theme to avoid repeating it...
theme_new <- theme_set(theme_bw())
theme_new <- theme_update(
axis.text.x = element_text(vjust=0.5, color="black", size=20, angle=90),
axis.text.y = element_text(vjust=0.5, color="black", size=20),
axis.title.y = element_text(vjust=1.0, color="black", size=20, angle=90),
axis.title.x = element_text(vjust=0.5, color="black", size=20),
strip.text.x = element_text(size=25),
legend.position = "bottom",
legend.text = element_text(size=20),
legend.title = element_text(size=20))
################################################################################
# CHUNK 1-Figure 2: Decadal division of faunal publications using differential
# methodological recording techniques during the 1930s–2010s. Download data file
# "data/figure2.csv" from Conrad (2015). This file includes counts per decade for
# each recording technique used in archaeozoological publications. This figure
# is constructed by reshaping the data into long format, then using
# facet_wrap() to construct four plots, one per recording technique.
################################################################################
# Set absolute path to csv file on your computer
fn <- "data/figure2.csv"
sea <- read.csv(fn, stringsAsFactors = FALSE, check.names = FALSE)
sea
## Faunal Publications NISP MNI Presence/Absence Descriptive
## 1 1930-1940 0 0 0 1
## 2 1960-1970 1 0 1 0
## 3 1970-1980 0 0 1 0
## 4 1980-1990 0 2 1 2
## 5 1990-2000 3 5 1 0
## 6 2000-2010 3 0 0 3
## 7 2010-Present 4 0 0 0
str(sea)
## 'data.frame': 7 obs. of 5 variables:
## $ Faunal Publications: chr "1930-1940" "1960-1970" "1970-1980" "1980-1990" ...
## $ NISP : int 0 1 0 0 3 3 4
## $ MNI : int 0 0 0 2 5 0 0
## $ Presence/Absence : int 0 1 1 1 1 0 0
## $ Descriptive : int 1 0 0 2 0 3 0
sea.long <- melt(sea, value.name="Count")
## Using Faunal Publications as id variables
sea.long
## Faunal Publications variable Count
## 1 1930-1940 NISP 0
## 2 1960-1970 NISP 1
## 3 1970-1980 NISP 0
## 4 1980-1990 NISP 0
## 5 1990-2000 NISP 3
## 6 2000-2010 NISP 3
## 7 2010-Present NISP 4
## 8 1930-1940 MNI 0
## 9 1960-1970 MNI 0
## 10 1970-1980 MNI 0
## 11 1980-1990 MNI 2
## 12 1990-2000 MNI 5
## 13 2000-2010 MNI 0
## 14 2010-Present MNI 0
## 15 1930-1940 Presence/Absence 0
## 16 1960-1970 Presence/Absence 1
## 17 1970-1980 Presence/Absence 1
## 18 1980-1990 Presence/Absence 1
## 19 1990-2000 Presence/Absence 1
## 20 2000-2010 Presence/Absence 0
## 21 2010-Present Presence/Absence 0
## 22 1930-1940 Descriptive 1
## 23 1960-1970 Descriptive 0
## 24 1970-1980 Descriptive 0
## 25 1980-1990 Descriptive 2
## 26 1990-2000 Descriptive 0
## 27 2000-2010 Descriptive 3
## 28 2010-Present Descriptive 0
ggplot(sea.long, aes(`Faunal Publications`, Count, group = 1)) +
geom_point(size = 4) +
geom_line() +
facet_wrap(~ variable)

# there are no dimensions specified at http://www.openquaternary.com/about/submissions/#Figures & Tables
# so we'll just do what looks good...
fig_width <- 300 # play with this number
ggsave(filename = "figures/fig_2.png",
dpi = 300, units = "mm",
height = fig_width/1.6, width = fig_width)
################################################################################
# CHUNK 2-Figure 3: Grouped classifications from Thailand sites expressing total NISP
# counts per site (left column), and total NISP counts for all sites (right column).
# Download data file "data/figure3.csv" from Conrad (2015). This file
# includes total NISP counts for all Thai sites in this dataset, grouped by
# highest taxon specific values. To create this figure the data is reshaped
# to a long format and wrapped to express NISP counts per taxonomic classification
# in each site, and total for all sites. The plot gives total counts per site on
# the left half, with total aggregated NISPs appearing on the right half.
# Colored points denote sites.
################################################################################
fn <- "data/figure3.csv"
thai <- read.csv(fn, stringsAsFactors = FALSE, skip = 1, check.names=FALSE)
thai
## Taxonomic Classification Occurences NISP Tham Lod Lang Kamnan
## 1 Testudines 14 14454 746 121
## 2 Varanidae 7 3966 1 2
## 3 Cervidae 42 3000 2234 11
## 4 Primate 29 2275 23 NA
## 5 Suidae 13 897 356 NA
## 6 Bovidae 26 662 561 9
## 7 Pisces 7 527 27 NA
## Lang Rongrien Khao Toh Chong Moh Khiew II Thung Nong Nien
## 1 1221 215 11951 200
## 2 NA 39 3582 342
## 3 118 6 344 287
## 4 NA 25 1960 267
## 5 55 NA 367 119
## 6 11 1 75 5
## 7 9 7 100 384
str(thai)
## 'data.frame': 7 obs. of 9 variables:
## $ Taxonomic Classification: chr "Testudines " "Varanidae" "Cervidae " "Primate" ...
## $ Occurences : int 14 7 42 29 13 26 7
## $ NISP : int 14454 3966 3000 2275 897 662 527
## $ Tham Lod : int 746 1 2234 23 356 561 27
## $ Lang Kamnan : int 121 2 11 NA NA 9 NA
## $ Lang Rongrien : int 1221 NA 118 NA 55 11 9
## $ Khao Toh Chong : int 215 39 6 25 NA 1 7
## $ Moh Khiew II : int 11951 3582 344 1960 367 75 100
## $ Thung Nong Nien : int 200 342 287 267 119 5 384
thai$`Taxonomic Classification order` <- thai$`Taxonomic Classification`
thai$`Taxonomic Classification` <- factor(thai$`Taxonomic Classification`,
levels = rev(thai$`Taxonomic Classification order`))
thai.long <- melt(thai
, measure.vars = c("NISP", "Tham Lod", "Lang Kamnan",
"Lang Rongrien", "Khao Toh Chong",
"Moh Khiew II", "Thung Nong Nien")
, variable.name = "Site"
, value.name = "NISP"
, na.rm = TRUE)
thai.long$Total.ind <- factor(ifelse((thai.long$Site == "NISP"), "Total", "Sites"))
p1 <- ggplot(thai.long, aes(x = NISP, y = `Taxonomic Classification`)) +
guides(colour=guide_legend(nrow=3,byrow=TRUE)) +
geom_point(aes(colour = Site), alpha = 0.75, size=5) +
facet_grid(. ~ Total.ind)
p1

# save plot
fig_width <- 300 # play with this number
ggsave(filename = "figures/fig_3.png",
dpi = 300, units = "mm",
height = fig_width/1.6, width = fig_width)
################################################################################
# CHUNK 3-Figure 4: Grouped classifications from Thailand sites expressing total NISP
# counts per site (left column), and total NISP counts for all sites (right column)
# without Moh Khiew Cave II. Download data file "data/figure4.csv" from Conrad (2015).
# This file includes total NISP counts for all Thai sites in this dataset,
# excluding Moh Khiew Cave II. This csv file is also grouped by highest taxon
# specific values.To create this figure the data is reshaped like prior. The plot
# gives total counts per site on the left half, with total aggregated NISPs
# appearing on the right half. Colored points denote sites.
################################################################################
fn <- "data/figure4.csv"
thai2 <- read.csv(fn, stringsAsFactors = FALSE, skip = 1, check.names=FALSE)
thai2
## Taxonomic Classification Occurences NISP Tham Lod Lang Kamnan
## 1 Cervidae 42 2656 2234 11
## 2 Testudines 14 2503 746 121
## 3 Bovidae 26 587 561 9
## 4 Suidae 13 530 356 NA
## 5 Pisces 7 427 27 NA
## 6 Varanidae 7 384 1 2
## 7 Primates 29 315 23 NA
## Lang Rongrien Khao Toh Chong Thung Nong Nien
## 1 118 6 287
## 2 1221 215 200
## 3 11 1 5
## 4 55 NA 119
## 5 9 7 384
## 6 NA 39 342
## 7 NA 25 267
str(thai2)
## 'data.frame': 7 obs. of 8 variables:
## $ Taxonomic Classification: chr "Cervidae " "Testudines " "Bovidae " "Suidae " ...
## $ Occurences : int 42 14 26 13 7 7 29
## $ NISP : int 2656 2503 587 530 427 384 315
## $ Tham Lod : int 2234 746 561 356 27 1 23
## $ Lang Kamnan : int 11 121 9 NA NA 2 NA
## $ Lang Rongrien : int 118 1221 11 55 9 NA NA
## $ Khao Toh Chong : int 6 215 1 NA 7 39 25
## $ Thung Nong Nien : int 287 200 5 119 384 342 267
thai2$`Taxonomic Classification order` <- thai2$`Taxonomic Classification`
thai2$`Taxonomic Classification` <- factor(thai2$`Taxonomic Classification`,
levels = rev(thai2$`Taxonomic Classification order`))
thai2.long <- melt(thai2
, measure.vars = c("NISP", "Tham Lod", "Lang Kamnan",
"Lang Rongrien", "Khao Toh Chong",
"Thung Nong Nien")
, variable.name = "Site"
, value.name = "NISP"
, na.rm = TRUE)
thai2.long$Total.ind <- factor(ifelse((thai2.long$Site == "NISP"), "Total", "Sites"))
p2 <- ggplot(thai2.long, aes(x = NISP, y = `Taxonomic Classification`))
p2 <- p2 + geom_point(aes(colour = Site), alpha = 0.75, size=5)
p2 <- p2 + facet_grid(. ~ Total.ind) + guides(colour=guide_legend(nrow=3,byrow=TRUE))
p2

# save plot
fig_width <- 300 # play with this number
ggsave(filename = "figures/fig_4.png",
dpi = 300, units = "mm",
height = fig_width/1.6, width = fig_width)
################################################################################
# CHUNK 4-Figure 5: Grouped classifications from Thailand sites expressing total NISP
# counts per site (left column), and total NISP counts for all sites
# (right column) without Testudines. Download data file "data/figure5.csv" from
# Conrad (2015). This file includes total NISP counts for all Thai sites in
# this dataset, excluding Testudines. This csv file is also grouped by highest
# taxon specific values. To create this figure the data is reshaped like prior.
# The plot gives total counts per site on the left half, with total aggregated
# NISPs appearing on the right half. Colored points denote sites.
################################################################################
fn <- "data/figure5.csv"
thai3 <- read.csv(fn, stringsAsFactors = FALSE, skip = 1, check.names=FALSE)
thai3
## Taxonomic Classification Occurences NISP Tham Lod Lang Kamnan
## 1 Varanidae 7 3966 1 2
## 2 Cervidae 42 3000 2234 11
## 3 Primate 29 2275 23 NA
## 4 Suidae 13 897 356 NA
## 5 Bovidae 26 662 561 9
## 6 Pisces 7 527 27 NA
## Lang Rongrien Khao Toh Chong Moh Khiew II Thung Nong Nien
## 1 NA 39 3582 342
## 2 118 6 344 287
## 3 NA 25 1960 267
## 4 55 NA 367 119
## 5 11 1 75 5
## 6 9 7 100 384
str(thai3)
## 'data.frame': 6 obs. of 9 variables:
## $ Taxonomic Classification: chr "Varanidae" "Cervidae " "Primate" "Suidae " ...
## $ Occurences : int 7 42 29 13 26 7
## $ NISP : int 3966 3000 2275 897 662 527
## $ Tham Lod : int 1 2234 23 356 561 27
## $ Lang Kamnan : int 2 11 NA NA 9 NA
## $ Lang Rongrien : int NA 118 NA 55 11 9
## $ Khao Toh Chong : int 39 6 25 NA 1 7
## $ Moh Khiew II : int 3582 344 1960 367 75 100
## $ Thung Nong Nien : int 342 287 267 119 5 384
thai3$`Taxonomic Classification order` <- thai3$`Taxonomic Classification`
thai3$`Taxonomic Classification` <- factor(thai3$`Taxonomic Classification`,
levels = rev(thai3$`Taxonomic Classification order`))
thai3.long <- melt(thai3
, measure.vars = c("NISP", "Tham Lod", "Lang Kamnan",
"Lang Rongrien", "Khao Toh Chong",
"Moh Khiew II", "Thung Nong Nien")
, variable.name = "Site"
, value.name = "NISP"
, na.rm = TRUE)
thai3.long$Total.ind <- factor(ifelse((thai3.long$Site == "NISP"), "Total", "Sites"))
p3 <- ggplot(thai3.long, aes(x = NISP, y = `Taxonomic Classification`))
p3 <- p3 + geom_point(aes(colour = Site), alpha = 0.75, size=5)
p3 <- p3 + facet_grid(. ~ Total.ind) + guides(colour=guide_legend(nrow=3,byrow=TRUE))
p3

# save plot
fig_width <- 300 # play with this number
ggsave(filename = "figures/fig_5.png",
dpi = 300, units = "mm",
height = fig_width/1.6, width = fig_width)
################################################################################
# CHUNK 5-Figure 6: Grouped classifications from Peninsular Malaysian sites expressing
# total NISP counts per site (left column), and total NISP counts for all sites
# (right column). Download data file "data/figure6.csv" from Conrad (2015). This file
# includes total NISP counts for all Peninsula Malaysia sites in this dataset.
# This csv file is also grouped by highest taxon specific values.
# To create this figure the data is reshaped like prior. The plot gives total
# counts per site on the left half, with total aggregated NISPs appearing
# on the right half. Colored points denote sites.
################################################################################
fn <- "data/figure6.csv"
ma <- read.csv(fn, stringsAsFactors = FALSE, skip = 1, check.names=FALSE)
ma
## Taxonomic Classification Occurences Total NISP Gua Gunung Runtuh
## 1 Suidae 9 212 26
## 2 Testudines 14 212 4
## 3 Primate 18 76 24
## 4 Pisces 7 60 NA
## 5 Cervidae 10 54 13
## 6 Varanus sp. 4 24 1
## 7 Bovidae 6 6 1
## Gua Peraling Gua Kechil Gua Tenggek Gua Sagu
## 1 152 28 NA 6
## 2 23 22 11 152
## 3 50 NA NA 2
## 4 43 9 NA 8
## 5 16 7 NA 18
## 6 14 NA NA 9
## 7 NA NA 1 4
str(ma)
## 'data.frame': 7 obs. of 8 variables:
## $ Taxonomic Classification: chr "Suidae" "Testudines" "Primate" "Pisces " ...
## $ Occurences : int 9 14 18 7 10 4 6
## $ Total NISP : int 212 212 76 60 54 24 6
## $ Gua Gunung Runtuh : int 26 4 24 NA 13 1 1
## $ Gua Peraling : int 152 23 50 43 16 14 NA
## $ Gua Kechil : int 28 22 NA 9 7 NA NA
## $ Gua Tenggek : int NA 11 NA NA NA NA 1
## $ Gua Sagu : int 6 152 2 8 18 9 4
ma$`Taxonomic Classification order` <- ma$`Taxonomic Classification`
ma$`Taxonomic Classification` <- factor(ma$`Taxonomic Classification`,
levels = rev(ma$`Taxonomic Classification order`))
ma.long <- melt(ma
, measure.vars = c("Total NISP", "Gua Gunung Runtuh",
"Gua Peraling", "Gua Kechil", "Gua Tenggek",
"Gua Sagu")
, variable.name = "Site"
, value.name = "NISP"
, na.rm = TRUE)
ma.long$Total.ind <- factor(ifelse((ma.long$Site == "Total NISP"), "Total", "Sites"))
p4 <- ggplot(ma.long, aes(x = NISP, y = `Taxonomic Classification`))
p4 <- p4 + geom_point(aes(colour = Site), alpha = 0.75, size=5)
p4 <- p4 + facet_grid(. ~ Total.ind) + guides(colour=guide_legend(nrow=3,byrow=TRUE))
p4

# save plot
fig_width <- 300 # play with this number
ggsave(filename = "figures/fig_6.png",
dpi = 300, units = "mm",
height = fig_width/1.6, width = fig_width)
################################################################################
# CHUNK 6-Figure 7: Squared chord distance values for Thailand and Peninsular
# Malaysian sites. S=Surface context. This group of data is arranged in its final
# form to include eight different plots into one. For these plots, download all
# site specific csv files "data/figure7..." in Conrad (2015), then use the function
# list.files () to upload into R. These files include squared chord distance
# values, quantified between contexts, for each site. The code is constructed to
# input the values, plot each site and loop all sites together into one plot.
################################################################################
fig7_file_names <- list.files(path = "data", pattern = "figure7", full.names = TRUE)
fig7_data <- lapply(fig7_file_names, read.csv)
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_KT.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_LK.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_LR.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_MC.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_MKI.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_SK.csv'
## Warning in read.table(file = file, header = header, sep = sep, quote
## = quote, : incomplete final line found by readTableHeader on 'data/
## figure7_TL.csv'
pattern_to_remove <- c("figure7_", "data/", ".csv")
names(fig7_data) <- gsub(paste0(pattern_to_remove, collapse = "|"), "", fig7_file_names)
my_plots <- vector("list", length = length(fig7_data))
for(i in seq_along(fig7_data)){
p <- ggplot(fig7_data[[i]], aes(Context, SCD, group = 1)) +
geom_point(position = "identity", na.rm=FALSE, size=4) +
geom_line() +
labs(title = names(fig7_data)[i]) + xlab("") +ylab("") + ylim(0,2) +
coord_flip() +
theme_bw() +
theme(axis.text.x = element_text(vjust=0.5, color="black", size=30),
axis.text.y = element_text(vjust=0.5, color="black", size=30),
axis.title.y = element_text(vjust=2.0, color="black", size=30),
axis.title.x = element_text(vjust=0.5, color="black", size=30),
strip.text.x = element_text(size=30),
plot.title = element_text(vjust=0.5, color="black", size=30))
my_plots[[i]] <- p
}
# to Save it
g1 <- do.call("arrangeGrob", c(my_plots, ncol=4))
# view it
g1

# save plot
fig_width <- 600 # play with this number
ggsave(filename = "figures/fig_7.png", g1,
dpi = 300, units = "mm",
height = fig_width, width = fig_width)