library(tidyverse)
Zebra Teeth Simulation Outcomes
Load packages
Birth and mortality profiles
Birth
<-read_csv("birthDeath.csv")
birthDeath$born<-month.abb[birthDeath$born]
birthDeath
ggplot(birthDeath,aes(x=fct_relevel(as_factor(born),"Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"))) +
geom_bar(fill="darkblue") +
theme_classic() +
labs(x="Birth month",y="n") +
theme(
axis.text=element_text(size=12),
axis.title=element_text(size=14),
axis.text.x = element_text(angle = 45,hjust=1))
Death
ggplot(birthDeath,aes(x=died)) +
geom_histogram(fill="darkred") +
theme_classic() +
labs(x="Age at death (years)",y="n") +
theme(
axis.text=element_text(size=12),
axis.title=element_text(size=14))
Tooth growth models
<-read_csv("growthRates.csv") %>%
toothGrowthpivot_longer(cols=constant:nonLinear,names_to="Model",values_to="crownHeight")
ggplot(toothGrowth,aes(x=day,y=crownHeight,color=fct_recode(Model,Constant="constant","Non-linear"="nonLinear"))) +
geom_line(lwd=1.1) +
scale_color_manual(values=c("black","darkorange")) +
theme_classic() +
labs(x="Days since start of mineralization",y="Crown Height (\u{03BC}m)",color="Model") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Model 𝝳18O signals
Amplitudes of 1, 2, 3, and 4‰ were simulated; most results below are based on the 3‰ signal.
<-read_csv("sim_d18O.csv") %>%
d18Opivot_longer(cols=AMP1:AMP4,names_to="Amplitude",values_to="d18O")
ggplot(d18O,aes(x=as_date(DAY,origin="2000-12-21"),y=d18O,color=case_match(Amplitude,"AMP1"~"1","AMP2"~"2","AMP3"~"3","AMP4"~"4"))) +
geom_hline(yintercept=-4,linetype=2)+
geom_line(lwd=1.1) +
scale_color_manual(values=palette.colors(4,palette="R4")) +
scale_x_date(date_labels = "%b", date_breaks = '3 months') +
theme_classic() +
labs(x="",y=expression(paste(delta^"18","O")),color="Amplitude (\u{2030})") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Constant vs non-linear growth rate (no wear; Amplitude = 3)
These teeth come from animals that are all born on the same day (Jan 1) and die after 55 months.
Just the constant model:
<-read_csv("simZebraDataEX12.csv") %>% pivot_longer(cols=d18O_1:d18O_4,names_to="Amplitude",values_to="d18O",names_prefix="d18O_")
Ex1<-filter(Ex1,Amplitude==3,specimen==1)
Ex1_Amp3<-filter(Ex1_Amp3,model=="Constant")
Ex1_Amp3_con
ggplot(Ex1_Amp3_con,aes(x=mm_band,y=d18O)) +
geom_line() +
geom_hline(yintercept=-4,linetype=2)+
geom_line(lwd=1.1) +
theme_classic() +
labs(x="Distance from Occlusal Surface (mm)",y=expression(paste(delta^"18","O")))+
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Constant and non-linear
ggplot(Ex1_Amp3,aes(x=mm_band,y=d18O,color=fct_recode(model,"Non-Linear" = "High"))) + geom_line() +
geom_hline(yintercept=-4,linetype=2)+
geom_line(lwd=1.1) +
scale_color_manual(values=c("black","darkorange")) +
theme_classic() +
labs(x="Distance from Occlusal Surface (mm)",y=expression(paste(delta^"18","O")),color="Model")+
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Non-linear growth with variable birth season ( Amplitude = 3)
<-read_csv("simZebraDataEX3.csv") %>% pivot_longer(cols=d18O_1:d18O_4,names_to="Amplitude",values_to="d18O",names_prefix="d18O_") Ex3
Rows: 55000 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): model
dbl (16): specimen, age_at_death, birth_year, birth_month, birth_day, crown_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<-unique(Ex3$specimen)
uv<-sample(uv,50,replace=FALSE)
samp<-filter(Ex3,Amplitude==3,model=="High", specimen %in% samp)
Ex3_Amp3
ggplot(Ex3_Amp3,aes(x=mm_band,y=d18O,group=specimen,color=specimen)) +
geom_hline(yintercept=-4,linetype=2)+
geom_line(alpha=0.8,color="grey") +
theme_classic() +
labs(x="Distance from Occlusal Surface (mm)",y=expression(paste(delta^"18","O"))) +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Non-linear growth with variable birth season and death age (Amplitude = 3)
Note: samples chosen random
set.seed(303) #to keep the sampling of teeth consistent
<-read_csv("simZebraDataEX4.csv") Ex4
Rows: 33671 Columns: 17
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): model
dbl (16): specimen, age_at_death, birth_year, birth_month, birth_day, crown_...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<-Ex4 %>% pivot_longer(cols=d18O_1:d18O_4,names_to="Amplitude",values_to="d18O",names_prefix="d18O_")
Ex4_pivot<-unique(Ex4$specimen)
uv<-sample(uv,5,replace=FALSE)
samp<-filter(Ex4_pivot,Amplitude==3,model=="High", specimen %in% samp)
Ex4_Amp3_samp
ggplot(Ex4_Amp3_samp,aes(x=mm_band,y=d18O,group=as_factor(specimen),color=as_factor(specimen))) +
geom_hline(yintercept=-4,linetype=2)+
geom_line(lwd=1.1) +
theme_classic() +
scale_color_manual(values=palette.colors(5,palette="R4")) +
labs(x="Distance from Occlusal Surface (mm)",y=expression(paste(delta^"18","O")),color="Specimen #")+
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Boxplots of intratooth variation
ggplot(Ex4_Amp3_samp,aes(x=as_factor(specimen),y=d18O,group=as_factor(specimen),color=as_factor(specimen))) +
geom_hline(yintercept=-4,linetype=2)+
geom_boxplot(show.legend = FALSE) +
geom_hline(yintercept=-4,linetype=2)+
theme_classic() +
scale_color_manual(values=palette.colors(5,palette="R4")) +
labs(x="Specimen #",y=expression(paste(delta^"18","O")),color="Specimen #")+
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
BULK SAMPLING METHOD A: Estimating amplitude from random 1mm bands (Non-linear growth, Amplitude = 3)
<-Ex4 %>% filter(model=="High") %>% drop_na()
Ex4_high
<-c(10,20,50,100)
sampleSizes<-c()
size<-c()
bootstrapAmpfor (i in sampleSizes) {
for (j in c(1:100)) {
<-Ex4_high %>% group_by(specimen) %>% slice_sample(n=1) %>% ungroup() %>% slice_sample(n=i)
samp<-append(size,i)
size<-append(bootstrapAmp,abs(diff(range(samp$d18O_3)))/2)
bootstrapAmp
}
}<-data.frame(size,bootstrapAmp)
dataggplot(data,aes(x=as_factor(size),y=bootstrapAmp)) +
geom_hline(yintercept=3,linetype=2)+
geom_boxplot() +
theme_classic() +
ylim(c(1,4)) +
labs(x="Sample Size (# of teeth)",y="Estimated Amplitude (\u{2030})") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
BULK SAMPLING METHOD A: Estimating amplitude from sampling a random 1mm band with 20mm size cutoff (Non-linear growth, Amplitude = 3)
<-20
cutoff<-Ex4 %>% filter(model=="High") %>% drop_na() %>% group_by(specimen) %>% filter( n() >=cutoff) %>% ungroup()
Ex4_highCut
<-c(10,20,50,100)
sampleSizes<-c()
size<-c()
bootstrapAmpfor (i in sampleSizes) {
for (j in c(1:100)) {
<-Ex4_highCut %>% group_by(specimen) %>% slice_sample(n=1) %>% ungroup() %>% slice_sample(n=i)
samp<-append(size,i)
size<-append(bootstrapAmp,abs(diff(range(samp$d18O_3)))/2)
bootstrapAmp
}
}<-data.frame(size,bootstrapAmp)
dataggplot(data,aes(x=as_factor(size),y=bootstrapAmp)) +
geom_hline(yintercept=3,linetype=2)+
geom_boxplot() +
theme_classic() +
ylim(c(1,4)) +
labs(x="Sample Size (# of teeth)",y="Estimated Amplitude (\u{2030})") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
BULK SAMPLING METHOD B: Estimating amplitude from averaged sample taken along growth axis (Non-linear growth, Amplitude = 3)
<-Ex4 %>% filter(model=="High") %>% drop_na()
Ex4_high
<-c(10,20,50,100)
sampleSizes<-c()
size<-c()
bootstrapAmpfor (i in sampleSizes) {
for (j in c(1:100)) {
<-Ex4_high %>% group_by(specimen) %>% slice_sample(n=i) %>% summarize(mean(d18O_3))
samp<-append(size,i)
size<-append(bootstrapAmp,abs(diff(range(samp$`mean(d18O_3)`)))/2)
bootstrapAmp
}
}<-data.frame(size,bootstrapAmp)
dataggplot(data,aes(x=as_factor(size),y=bootstrapAmp)) +
geom_hline(yintercept=3,linetype=2)+
geom_boxplot() +
theme_classic() +
ylim(c(1,4)) +
labs(x="Sample Size (# of teeth)",y="Estimated Amplitude (\u{2030})") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
BULK SAMPLING METHOD B: Estimating amplitude from averaged sample taken along growth axis with 20 mm cutoff (Non-linear growth, Amplitude = 3)
<-20
cutoff<-Ex4 %>% filter(model=="High") %>% drop_na() %>% group_by(specimen) %>% filter( n() >=cutoff) %>% ungroup()
Ex4_highCut
<-c(10,20,50,100)
sampleSizes<-c()
size<-c()
bootstrapAmpfor (i in sampleSizes) {
for (j in c(1:100)) {
<-Ex4_highCut %>% group_by(specimen) %>% slice_sample(n=i) %>% summarize(mean(d18O_3))
samp<-append(size,i)
size<-append(bootstrapAmp,abs(diff(range(samp$`mean(d18O_3)`)))/2)
bootstrapAmp
}
}<-data.frame(size,bootstrapAmp)
dataggplot(data,aes(x=as_factor(size),y=bootstrapAmp)) +
geom_hline(yintercept=3,linetype=2)+
geom_boxplot() +
theme_classic() +
ylim(c(1,4)) +
labs(x="Sample Size (# of teeth)",y="Estimated Amplitude (\u{2030})") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
Assessing amplitude
set.seed(303)
<-Ex4 %>%
ampMethodCheckfilter(model=="High") %>%
drop_na(d18O_3) %>%
group_by(as_factor(specimen)) %>%
slice_sample(n=200) %>%
arrange(specimen,mm_band) %>%
summarize(stdev=sd(d18O_3),min=min(d18O_3),max=max(d18O_3)) %>%
mutate(minmax=max-min) %>%
rename(specimen=`as_factor(specimen)`)%>%
select(specimen,stdev,minmax) %>%
pivot_longer(
cols=stdev:minmax,
names_to="method",
values_to="value"
%>%
) mutate(samp_freq="1:1 mm")
<-rep(c(2.124,6),nrow(ampMethodCheck)/2)
mids<-bind_cols(ampMethodCheck,mids=mids)
ampMethodCheck
<-Ex4 %>%
ampMethodCheck2filter(model=="High") %>%
drop_na(d18O_3) %>%
group_by(as_factor(specimen)) %>%
slice_sample(n=200) %>%
filter(mm_band %% 5 == 0) %>%
arrange(specimen,mm_band) %>%
summarize(stdev=sd(d18O_3),min=min(d18O_3),max=max(d18O_3)) %>%
mutate(minmax=max-min) %>%
rename(specimen=`as_factor(specimen)`)%>%
select(specimen,stdev,minmax) %>%
pivot_longer(
cols=stdev:minmax,
names_to="method",
values_to="value"
%>%
)mutate(samp_freq="1:5 mm")
<-rep(c(2.124,6),nrow(ampMethodCheck2)/2)
mids<-bind_cols(ampMethodCheck2,mids=mids)
ampMethodCheck2
<-bind_rows(ampMethodCheck,ampMethodCheck2) %>%
ampMethodCheckAllmutate(method=recode(method,"minmax"="Min-Max","stdev"="Standard Deviation"))
ggplot(ampMethodCheckAll,aes(x=value)) +
geom_histogram() +
geom_vline(aes(xintercept = mids),data=ampMethodCheckAll,lty=2,lwd=1,col="darkred") +
facet_grid(vars(samp_freq),vars(method),scales="free_x") +
theme_bw() +
labs(x="Simulated estimate (\u{2030})",y="Frequency") +
theme(axis.text=element_text(size=12),axis.title=element_text(size=14))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 5 rows containing non-finite outside the scale range
(`stat_bin()`).