#KM 06/24/2021 #Statistics for "The mtDNA mutation spectrum in the PolG mutator mouse reveals germline and somatic selection" #Need to run figure file first ######FIG 2 Stats###### #count_pct_change_tissue fig2_data.error %>% ungroup%>% filter(som_germ=="Total")%>% select(count_no_brn_germ,tissue)%>% pivot_wider(names_from=tissue,values_from=count_no_brn_germ)%>% mutate(percent_diff = (Liver-Brain)/abs(Brain)*100) #freq_pct_change_tissue fig2_data.error %>% ungroup%>% filter(som_germ=="Total")%>% select(freq,tissue)%>% pivot_wider(names_from=tissue,values_from=freq)%>% mutate(percent_diff = (Liver-Brain)/abs(Brain)*100) #tissue_t_tests_dataframe tissue_stats <-PolGData %>% group_by(Animal, tissue) %>% summarise(freq = sum(mut_freq), count = n())%>% mutate(count = count/mt_genome_length,freq = freq/mt_genome_length,som_germ = "Total")%>% mutate(count_cube_root = count^(1/3), freq_cube_root= freq^(1/3)) ##count_t_test #significant shapiro_test(tissue_stats$count) #not significant shapiro_test(tissue_stats$count_cube_root) #equal variance not significant var.test(count_cube_root~tissue,tissue_stats) #t_test_count_tissue tissue_stats %>% ungroup%>% t_test(.,count_cube_root~tissue,paired=TRUE,ref.group = "Liver") ##frequency_t_test #barely not significant shapiro_test(tissue_stats$freq) #not significant shapiro_test(tissue_stats$freq_cube_root) #equal variance not significant var.test(freq_cube_root~tissue,tissue_stats) #t_test_frequency_tissue tissue_stats %>% ungroup%>% t_test(.,freq_cube_root~tissue,paired=TRUE,ref.group = "Liver") #############Here forward, looking at liver only fig_2_data_exclude_total_liver <- fig_2_data_exclude_total%>%filter(tissue=="Liver") #som_germ_count_pct_change_liver <- fig_2_data_exclude_total_liver %>% ungroup%>% group_by(som_germ)%>% summarise(mean_count = mean(count))%>% select(mean_count,som_germ)%>% pivot_wider(names_from=som_germ,values_from=mean_count)%>% mutate(percent_diff = (Somatic-Germline)/abs(Germline)*100) #som_germ_freq_pct_change_liver <- fig_2_data_exclude_total_liver%>% ungroup%>% group_by(som_germ)%>% summarise(mean_freq = mean(freq))%>% select(mean_freq,som_germ)%>% pivot_wider(names_from=som_germ,values_from=mean_freq)%>% mutate(percent_diff = (Somatic-Germline)/abs(Germline)*100) ##2A## #lmer for somatic germline status with animal as random effect for count fig2A.lmer<- lmer(count~som_germ+(1|Animal),fig_2_data_exclude_total_liver) summary(fig2A.lmer) #homogeniety test for lmer leveneTest(residuals(fig2A.lmer) ~ fig_2_data_exclude_total_liver$som_germ) #normality test qqmath(fig2A.lmer, id=0.05) #cube root transform. This is on manuscript fig2A.lmer.cube_root<- lmer(count_cube_root~som_germ+(1|Animal),fig_2_data_exclude_total_liver) summary(fig2A.lmer.cube_root) #homogeniety test for cube root lmer leveneTest(residuals(fig2A.lmer.cube_root) ~ fig_2_data_exclude_total_liver$som_germ) #normality test qqmath(fig2A.lmer.cube_root, id=0.05) ##2B## #lmer for somatic germline status with animal as random effect for count fig2B.lmer<- lmer(freq~som_germ+(1|Animal),fig_2_data_exclude_total_liver) summary(fig2B.lmer) #homogeniety test for lmer leveneTest(residuals(fig2B.lmer) ~ fig_2_data_exclude_total_liver$som_germ) #normality test qqmath(fig2B.lmer, id=0.05) #cube root transform. This is on manuscript fig2B.lmer.cube_root<- lmer(freq_cube_root~som_germ+(1|Animal),fig_2_data_exclude_total_liver) summary(fig2B.lmer.cube_root) #homogeniety test for cube root lmer leveneTest(residuals(fig2B.lmer.cube_root) ~ fig_2_data_exclude_total_liver$som_germ) #normality test qqmath(fig2B.lmer.cube_root, id=0.05) ######FIG 3 Stats###### #need NAs to show up as white on graph, but zeroes for stats fig3_data_stats <- fig3ce_data %>% replace_na(list(count=0,freq=0))%>% mutate(count_cube_root = count^(1/3),freq_cube_root = freq^(1/3)) fig3_data_stats$coding_status <- factor(fig3_data_stats$coding_status, levels=c("CDS","D-loop","tRNA","rRNA")) #count percent change d loop coding region fig3bd_data.error %>% ungroup%>% select(count,coding_status)%>% pivot_wider(names_from=coding_status,values_from=count)%>% mutate(percent_diff_dloopcds = (`D-loop`-CDS)/abs(CDS)*100,percent_diff_trnacds=(tRNA-CDS)/abs(CDS)*100) #freq percent change fig3bd_data.error %>% ungroup%>% select(freq,coding_status)%>% pivot_wider(names_from=coding_status,values_from=freq)%>% mutate(percent_diff_dloopcds = (`D-loop`-CDS)/abs(CDS)*100,percent_diff_trnacds=(tRNA-CDS)/abs(CDS)*100,percent_diff_rrnacds=(rRNA-CDS)/abs(CDS)*100) #BC fig3bd_data_stats.lmer<- lmer(count~som_germ*coding_status+(1|Animal),fig3_data_stats) summary(fig3bd_data_stats.lmer) qqmath(fig3bd_data_stats.lmer, id=0.05) #BC on manuscript fig3bd_data_stats.lmer.cube_root<- lmer(count_cube_root~som_germ*coding_status+(1|Animal),fig3_data_stats) qqmath(fig3bd_data_stats.lmer.cube_root, id=0.05) summary(fig3bd_data_stats.lmer.cube_root) #3CD fig3ce_data_stats.lmer<- lmer(freq~som_germ+coding_status+(1|Animal),fig3_data_stats) summary(fig3ce_data_stats.lmer) qqmath(fig3ce_data_stats.lmer, id=0.05) #3CE on manuscript fig3ce_data_stats.lmer.cube_root<- lmer(freq_cube_root~som_germ*coding_status+(1|Animal),fig3_data_stats) qqmath(fig3ce_data_stats.lmer.cube_root, id=0.05) summary(fig3ce_data_stats.lmer.cube_root) ######FIG 4 Stats###### #need NA for graph, zeroes for stats fig4_data_stats <- PolGData %>% filter(tissue == "Liver"&!is.na(mut_type)) %>% group_by(Animal,som_germ,mut_type) %>% summarise(freq = sum(mut_freq), count = n()) %>% mutate(freq = freq/mt_genome_length,count=count/mt_genome_length)%>% ungroup%>% complete(mut_type,nesting(som_germ,Animal),fill=list(freq=0,count=0))%>% mutate(count_cube_root = count^(1/3),freq_cube_root = freq^(1/3)) #count_percent_diff fig4ad_data.error %>% ungroup%>% select(count,mut_type)%>% pivot_wider(names_from=mut_type,values_from=count)%>% mutate(percent_diff_missilet = (missense-silent)/abs(silent)*100) #freq_percent_diff fig4ad_data.error %>% ungroup%>% select(freq,mut_type)%>% pivot_wider(names_from=mut_type,values_from=freq)%>% mutate(percent_diff_missilet = (missense-silent)/abs(silent)*100) #4B #lmer for somatic germline status and mutation type(silent,missense,nonsense) with animal as random effect for count fig4b_data_stats.lmer<- lmer(count~som_germ*mut_type+(1|Animal),fig4_data_stats) summary(fig4b_data_stats.lmer) #homogeniety test for lmer leveneTest(residuals(fig4b_data_stats.lmer) ~ factor(fig4_data_stats$som_germ)*fig4_data_stats$mut_type) #normality test qqmath(fig4b_data_stats.lmer, id=0.05) #cube root transform. This is on manuscript fig4b_data_stats.lmer.cube_root<- lmer(count_cube_root~som_germ*mut_type+(1|Animal),fig4_data_stats%>%mutate(mut_type=fct_relevel(mut_type,"silent","missense"))) summary(fig4b_data_stats.lmer.cube_root) #homogeniety test for cube root lmer leveneTest(residuals(fig4b_data_stats.lmer.cube_root) ~ factor(fig4_data_stats$som_germ)*fig4_data_stats$mut_type) #normality test qqmath(fig4b_data_stats.lmer.cube_root, id=0.05) #4E #lmer for somatic germline status and mutation type(silent,missense,nonsense) with animal as random effect for freq fig4e_data_stats.lmer<- lmer(freq~som_germ*mut_type+(1|Animal),fig4_data_stats) summary(fig4e_data_stats.lmer) #homogeniety test for lmer leveneTest(residuals(fig4e_data_stats.lmer) ~ factor(fig4_data_stats$som_germ)*fig4_data_stats$mut_type) #normality test qqmath(fig4e_data_stats.lmer, id=0.05) #cube root transform. This is on manuscript fig4e_data_stats.lmer.cube_root<- lmer(freq_cube_root~som_germ*mut_type+(1|Animal),fig4_data_stats%>%mutate(mut_type=fct_relevel(mut_type,"silent","missense"))) summary(fig4e_data_stats.lmer.cube_root) #homogeniety test for cube root lmer leveneTest(residuals(fig4e_data_stats.lmer.cube_root) ~ factor(fig4_data_stats$som_germ)*fig4_data_stats$mut_type) #normality test qqmath(fig4e_data_stats.lmer.cube_root, id=0.05) #4C ##codon position anova tukey for count fig4C.aov<-aov(count~factor(position),data=fig4cf_position_data) summary(fig4C.aov) TukeyHSD(fig4C.aov) #Normality qqnorm(fig4cf_position_data$count) qqline(fig4cf_position_data$count) ##codon position anova tukey cube root for count fig4C.aov.cuberoot<-aov(count_cube_root~factor(position),data=fig4cf_position_data) summary(fig4C.aov.cuberoot) TukeyHSD(fig4C.aov.cuberoot) #Normality qqnorm(fig4cf_position_data$count_cube_root) qqline(fig4cf_position_data$count_cube_root) #4F ##codon position anova tukey for freq fig4F.aov<-aov(freq~factor(position),data=fig4cf_position_data) summary(fig4F.aov) TukeyHSD(fig4F.aov) #Normality qqnorm(fig4cf_position_data$freq) qqline(fig4cf_position_data$freq) ##codon position anova tukey cube root for freq fig4F.aov.cuberoot<-aov(freq~factor(position),data=fig4cf_position_data) summary(fig4F.aov.cuberoot) TukeyHSD(fig4F.aov.cuberoot) #Normality qqnorm(fig4cf_position_data$freq_cube_root) qqline(fig4cf_position_data$freq_cube_root) #percent diffs fig4cf_position.error %>% ungroup%>% select(freq,position)%>% pivot_wider(names_from=position,values_from=freq)%>% mutate(percent_diff_position23 = (`3`-`2`)/abs(`2`)*100,percent_diff_position13 = (`3`-`1`)/abs(`1`)*100) ######FIG 5 Stats###### #5A anova count fig5a.aov<-aov(count~factor(type),data=fig5ab_data) summary(fig5a.aov) #Normality qqnorm(fig5ab_data$count) qqline(fig5ab_data$count) #5A anova with cube root transformation manuscript fig5a.aov_cube<-aov(count_cube_root~factor(type),data=fig5ab_data) summary(fig5a.aov_cube) #Tukey tests TukeyHSD(fig5a.aov_cube) #Normality qqnorm(fig5ab_data$count_cube_root) qqline(fig5ab_data$count_cube_root) #5B anova frequency fig5b.aov<-aov(freq~factor(type),data=fig5ab_data) summary(fig5b.aov) #Normality qqnorm(fig5ab_data$freq) qqline(fig5ab_data$freq) #5B anova with cube root transformation manuscript fig5b.aov_cube<-aov(freq_cube_root~factor(type),data=fig5ab_data) summary(fig5b.aov_cube) #Tukey Tests TukeyHSD(fig5b.aov_cube) #Normality qqnorm(fig5ab_data$freq_cube_root) qqline(fig5ab_data$freq_cube_root) #5C #need to make dataframe that only has hydrophobic and hydrophillic and has zeroes #old way, remember to delete fig5c_data.stats_<- fig5c_data %>% filter(type=="CT"&Animal!="b"&Animal!="d"&Animal!="g")%>% filter((amino_start_group == "Hydrophobic" | amino_start_group =="Hydrophilic") & (amino_mut_group == "Hydrophilic" | amino_mut_group =="Hydrophobic")) %>% replace_na(list(freq=0,count=0))%>% mutate(amino_start_group=fct_relevel(amino_start_group,"Hydrophobic","Hydrophilic"))%>% mutate(amino_mut_group=fct_relevel(amino_mut_group,"Hydrophobic","Hydrophilic"))%>% unite("both",amino_start_group,amino_mut_group) fig5c_data.stats<- fig5c_data %>% filter(type=="CT"&Animal!="b"&Animal!="d"&Animal!="g")%>% filter((amino_start_group == "Hydrophobic" | amino_start_group =="Hydrophilic") & (amino_mut_group == "Hydrophilic" | amino_mut_group =="Hydrophobic")) %>% replace_na(list(freq=0,count=0))%>% mutate(amino_start_group=fct_relevel(amino_start_group,"Hydrophobic","Hydrophilic"))%>% mutate(amino_mut_group=fct_relevel(amino_mut_group,"Hydrophilic","Hydrophobic"))%>% mutate(count_cube_root = count^(1/3),freq_cube_root = freq^(1/3)) #fig5c_data.error <- fig5c_data.stats %>% ungroup() %>% group_by(amino_start_group,amino_mut_group) %>% summarise(ci_count=ci95(count),ci_freq=ci95(freq),count=mean(count),freq=mean(freq)) fig5c_data.stats %>% ungroup() %>% group_by(amino_start_group) %>% summarise(ci_count=ci95(count),ci_freq=ci95(freq),count=mean(count),freq=mean(freq)) fig5c_data.stats %>% ungroup() %>% group_by(amino_mut_group) %>% summarise(ci_count=ci95(count),ci_freq=ci95(freq),count=mean(count),freq=mean(freq)) #untransformed data was better #linear mixed model for count for mutation type as fixed effect and animal as random effect fig_5c.lmer<- lmer(count~amino_start_group+amino_mut_group+(1|Animal),fig5c_data.stats) summary(fig_5c.lmer) #not significant homogeniety test for lmer leveneTest(residuals(fig_5c.lmer) ~ fig5c_data.stats$amino_start_group*fig5c_data.stats$amino_mut_group) #Normality qqmath(fig_5c.lmer, id=0.05) #linear mixed model for count for mutation type as fixed effect and animal as random effect fig_5c.lmer.cuberoot<- lmer(count_cube_root~amino_start_group*amino_mut_group+(1|Animal),fig5c_data.stats) summary(fig_5c.lmer.cuberoot) #Signifcant homogeniety test for lmer leveneTest(residuals(fig_5c.lmer.cuberoot) ~ fig5c_data.stats$amino_start_group*fig5c_data.stats$amino_mut_group) #Normality qqmath(fig_5c.lmer.cuberoot, id=0.05) #linear mixed model for count for mutation type as fixed effect and animal as random effect fig_5d.lmer<- lmer(freq~amino_start_group*amino_mut_group+(1|Animal),fig5c_data.stats) summary(fig_5d.lmer) #Signifcant homogeniety test for lmer leveneTest(residuals(fig_5d.lmer) ~ fig5c_data.stats$amino_start_group*fig5c_data.stats$amino_mut_group) #Normality qqmath(fig_5d.lmer, id=0.05) #transformed data was better #linear mixed model for count for mutation type as fixed effect and animal as random effect fig_5d.lmer.cuberoot<- lmer(freq_cube_root~amino_start_group+amino_mut_group+(1|Animal),fig5c_data.stats) summary(fig_5d.lmer.cuberoot) #Signifcant homogeniety test for lmer leveneTest(residuals(fig_5d.lmer.cuberoot) ~ fig5c_data.stats$amino_start_group*fig5c_data.stats$amino_mut_group) #Normality qqmath(fig_5d.lmer.cuberoot, id=0.05) ######FIG 6 Stats###### #count_pct_change_somgerm fig6_data.error %>% ungroup%>% filter(som_germ!="Total")%>% select(count,som_germ)%>% pivot_wider(names_from=som_germ,values_from=count)%>% mutate(percent_diff = (Somatic-Germline)/abs(Germline)*100) #freq_pct_change_som_germ fig6_data.error %>% ungroup%>% filter(som_germ!="Total")%>% select(freq,som_germ)%>% pivot_wider(names_from=som_germ,values_from=freq)%>% mutate(percent_diff = (Somatic-Germline)/abs(Germline)*100) #6B #linear mixed model for count somgerm status indels fig6b.lmer<- lmer(count~som_germ+(1|Animal),fig_6_data_exclude_total) summary(fig6b.lmer) #homogeniety test for lmer leveneTest(residuals(fig6b.lmer) ~ fig_6_data_exclude_total$som_germ) #normality test qqmath(fig6b.lmer, id=0.05) #linear mixed model for count cube root somgerm status indels fig6b.lmer.cuberoot<- lmer(count_cube_root~som_germ+(1|Animal),fig_6_data_exclude_total) summary(fig6b.lmer.cuberoot) #homogeniety test for lmer leveneTest(residuals(fig6b.lmer) ~ fig_6_data_exclude_total$som_germ) #normality test qqmath(fig6b.lmer, id=0.05) #6C #linear mixed model for freq somgerm status indels fig6c.lmer<- lmer(freq~som_germ+(1|Animal),fig_6_data_exclude_total) summary(fig6c.lmer) #homogeniety test for lmer leveneTest(residuals(fig6c.lmer) ~ fig_6_data_exclude_total$som_germ) #normality test qqmath(fig6c.lmer, id=0.05) #linear mixed model for freq cube root somgerm status indels fig6c.lmer.cuberoot<- lmer(freq_cube_root~som_germ+(1|Animal),fig_6_data_exclude_total) summary(fig6c.lmer.cuberoot) #homogeniety test for lmer leveneTest(residuals(fig6c.lmer) ~ fig_6_data_exclude_total$som_germ) #normality test qqmath(fig6c.lmer, id=0.05) #6D PolGData_indels_L%>% group_by(frame,i_d,coding_non)%>% summarise(sum=n()) PolGData_indels_L%>% group_by(i_d)%>% summarise(sum=n()) PolGData_indels_L%>% group_by(frame,coding_non)%>% summarise(sum=n())