Beta Diversity Help¶
A quick summary to better understand beta diveristy. To make this work, you need to download and import the dummy dataset described in chapter ”Phyloseq Help”.
Compositional Beat-Diversity¶
Help with Distance¶
distanceMethodList betadiver(help = TRUE) help(designdist) # designdist {vegan}
We use different OTU-abundance-distribution scenarios to better understand Beta-diversity indices.
### Example [A]: Frequent OTU overlap - rare ones are unique ## Explore data t(otu_table(d.B)) ## Wide-to-long format - ideal for ggplots! x <- data.frame(otu_table(d.B)) x$OTU <- row.names(x) x.new <- gather(x, Sample, Counts, SB1, SB2) ## Barplot # Sample order - oneway or another # position <- gtools::mixedsort(x$OTU) position <- c("OTU1","OTU2","OTU3","OTU4","OTU5","OTU6","OTU7","OTU8","OTU9","OTU10","OTU11","OTU12","OTU13","OTU14","OTU15") p <- ggplot(x.new, aes(x = factor(OTU, levels = position ), y = Counts, fill = factor(Sample))) + geom_bar(stat = "identity", position = "dodge") + xlab("OTU") + ylab("Counts") p
We create a few more example cases and record the distances in a data frame:
## Create empty data frame df <- setNames(data.frame(matrix(ncol = 4, nrow = 5)), c("Bray-Curtis (b)", "Jaccard (b)","Bray-Curtis (q)", "Jaccard (q)")) row.names(df) <- c("Frequent-Overlap", "Frequent-Overlap(equal)", "Rare-Overlap", "Identical", "Different") ## Binary dist.bc.b <- phyloseq::distance(d.B, method = "(A+B-2*J)/(A+B)") ## "minimum" Bray-Curtis dist.jc.b <- phyloseq::distance(d.B, method = "cc") ## Jaccard index of dissimilarity dist.jc.b <- phyloseq::distance(d.B, method = "jaccard", binary = TRUE) ## vegdist jaccard ## Quantitative dist.bc.q <- phyloseq::distance(d.B, method = "bray") ## Bray-Curtis dist.jc.q <- phyloseq::distance(d.B, method = "jaccard") ## Jaccard index of dissimilarity df[1,1] <- dist.bc.b df[1,2] <- dist.jc.b df[1,3] <- dist.bc.q df[1,4] <- dist.jc.q ### Example [B]: Frequent Overlap ## Change counts by hand otu_table(d.B)[,1] <- c(20,10,10,5,5,1,1,1,1,1,0,0,0,0,0) otu_table(d.B)[,2] <- c(20,10,10,5,5,0,0,0,0,0,1,1,1,1,1) t(otu_table(d.B)) ## Binary dist.bc.b <- phyloseq::distance(d.B, method = "(A+B-2*J)/(A+B)") ## "minimum" Bray-Curtis dist.jc.b <- phyloseq::distance(d.B, method = "cc") ## Jaccard index of dissimilarity ## Quantitative dist.bc.q <- phyloseq::distance(d.B, method = "bray") ## Bray-Curtis dist.jc.q <- phyloseq::distance(d.B, method = "jaccard") ## Jaccard index of dissimilarity df[2,1] <- dist.bc.b df[2,2] <- dist.jc.b df[2,3] <- dist.bc.q df[2,4] <- dist.jc.q ### Example [C]: Rare Overlap ## Change counts by hand otu_table(d.B)[,1] <- c(20,10,10,5,5,1,1,1,1,1,0,0,0,0,0) otu_table(d.B)[,2] <- c(0,0,0,0,0,1,1,1,1,1,5,5,10,10,20) t(otu_table(d.B)) ## Binary dist.bc.b <- phyloseq::distance(d.B, method = "(A+B-2*J)/(A+B)") ## "minimum" Bray-Curtis dist.jc.b <- phyloseq::distance(d.B, method = "cc") ## Jaccard index of dissimilarity ## Quantitative dist.bc.q <- phyloseq::distance(d.B, method = "bray") ## Bray-Curtis dist.jc.q <- phyloseq::distance(d.B, method = "jaccard") ## Jaccard index of dissimilarity df[3,1] <- dist.bc.b df[3,2] <- dist.jc.b df[3,3] <- dist.bc.q df[3,4] <- dist.jc.q ### Example [D]: Both identical ## Change counts by hand otu_table(d.B)[,1] <- c(20,10,10,5,5,1,1,1,1,1,0,0,0,0,0) otu_table(d.B)[,2] <- c(20,10,10,5,5,1,1,1,1,1,0,0,0,0,0) t(otu_table(d.B)) ## Binary dist.bc.b <- phyloseq::distance(d.B, method = "(A+B-2*J)/(A+B)") ## "minimum" Bray-Curtis dist.jc.b <- phyloseq::distance(d.B, method = "cc") ## Jaccard index of dissimilarity ## Quantitative dist.bc.q <- phyloseq::distance(d.B, method = "bray") ## Bray-Curtis dist.jc.q <- phyloseq::distance(d.B, method = "jaccard") ## Jaccard index of dissimilarity df[4,1] <- dist.bc.b df[4,2] <- dist.jc.b df[4,3] <- dist.bc.q df[4,4] <- dist.jc.q ### Example [E]: Nothing in common ## Change counts by hand otu_table(d.B)[,1] <- c(20,10,10,5,5,1,1,1,0,0,0,0,0,0,0) otu_table(d.B)[,2] <- c(0,0,0,0,0,0,0,0,1,1,5,5,10,10,20) t(otu_table(d.B)) ## Binary dist.bc.b <- phyloseq::distance(d.B, method = "(A+B-2*J)/(A+B)") ## "minimum" Bray-Curtis dist.jc.b <- phyloseq::distance(d.B, method = "cc") ## Jaccard index of dissimilarity ## Quantitative dist.bc.q <- phyloseq::distance(d.B, method = "bray") ## Bray-Curtis dist.jc.q <- phyloseq::distance(d.B, method = "jaccard") ## Jaccard index of dissimilarity df[5,1] <- dist.bc.b df[5,2] <- dist.jc.b df[5,3] <- dist.bc.q df[5,4] <- dist.jc.q round(df, 3)
Bray-Curtis(b) | Jaccard(b) | Bray-Curtis(q) | Jaccard(q) | |
---|---|---|---|---|
Frequent-Overlap | 0.5 | 0.667 | 0.299 | 0.461 |
Frequent-Overlap(equal) | 0.5 | 0.667 | 0.091 | 0.167 |
Rare-Overlap | 0.5 | 0.667 | 0.909 | 0.952 |
Identical | 0.0 | 0.000 | 0.000 | 0.000 |
Different | 1.0 | 1.000 | 1.000 | 1.000 |
Phylogenetic Beta Diversity¶
Note
Careful with data manipulations. It is possible that even simple manipulations change the structure of the object or parts of it. For example, subsetting the imported data resulted in a change of the phylognetic tree information of the phyloseq object.
phy_tree(dB) # tree is rooted phy_tree(d.B) # tree is unrooted distance(dB, method = "unifrac") # rooted tree distance(d.B, method = "unifrac") # unrooted tree
To avoid that a random root is selected we have to re-root the tree by hand.
## Re-root the tree old.tree <- phy_tree(d.B) new.tree <- ape::root(old.tree, outgroup = "OTU13", resolve.root = TRUE) phy_tree(d.B) <- new.tree phy_tree(d.B)
To better understand beta diversity based on phylogenetic information, we create different dummy datasets and compare distances calculated with Unifrac and Weighted Unifrac.
# (A) All Equal otu_table(d.B)[,1] <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1) otu_table(d.B)[,2] <- c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance") # (B) All Different otu_table(d.B)[,1] <- c(1,1,1,1,1,0,0,0,0,0,0,0,0,0,1) otu_table(d.B)[,2] <- c(0,0,0,0,0,1,1,1,1,1,1,1,1,1,0) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance") # (C) Mixed taxa with difference abundance otu_table(d.B)[,1] <- c(10,1,10,1,10,1,10,1,10,1,10,1,10,1,1) otu_table(d.B)[,2] <- c(1,10,1,10,1,10,1,10,1,10,1,10,1,10,10) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance") # (D) Mixed taxa at low abundance otu_table(d.B)[,1] <- c(1,0,1,0,1,0,1,0,1,0,1,1,1,1,1) otu_table(d.B)[,2] <- c(0,1,0,1,0,1,0,1,0,1,0,1,1,1,1) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance") # (E) Mixed taxa at higher abundance otu_table(d.B)[,1] <- c(10,0,10,0,10,0,10,0,10,0,10,10,10,10,10) otu_table(d.B)[,2] <- c(0,10,0,10,0,10,0,10,0,10,0,10,10,10,10) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID") # (F) A few distinct taxa at higher abundance otu_table(d.B)[,1] <- c(1,1,1,1,1,1,1,1,1,1,1,10,10,10,1) otu_table(d.B)[,2] <- c(1,1,1,1,1,1,1,1,1,1,1,0,0,0,1) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance") # (G) A few distinct taxa at lower abundance otu_table(d.B)[,1] <- c(100,1,1,1,1,1,1,1,1,1,1,10,10,10,1) otu_table(d.B)[,2] <- c(100,1,1,1,1,1,1,1,1,1,1,0,0,0,1) distance(d.B, method = "unifrac") ## Unifrac distance(d.B, method = "wunifrac") ## Weighted Unifrac plot_tree(d.B, label.tips = "taxa_names", color = "SampleID", size = "Abundance")