Get Started with Phyloseq¶
We are using the Bioconductor package phyloseq for most parts of the data analysis. It is a convenient package to hand and analyse high-throughput microbiome census data. It comes with a detail manual, a compact vignette, and even an interactive web application. Besides the clear advantages of storing your sequencing experiment as a phyloseq class object, it might be dismissed as being difficult to get used to. Believe us, it is worth the effort. Following a few basic steps for you to get familiar with phyloseq.
Clean/Reset Environment¶
Make it a habit and start a new R session with the broom.
rm(list = ls())
Bioconductor Packages¶
library("phyloseq"); packageVersion("phyloseq") library("microbiome"); packageVersion("microbiome") library("descr"); packageVersion("descr")
Get Data¶
## Where am I? getwd() ## My document path # setwd("my/folder/") # adjust accordingly ## Working directory for MDA workshop # dir.create("MDA"); setwd("MDA") # Careful: you might have already created one in the previous section! ## Create Working Directory dir.create("Dummy"); setwd("Dummy") ## Download data (R data image file) dummy.url <- "https://www.gdc-docs.ethz.ch/MDA/data/DummyData.zip" utils::download.file(dummy.url, destfile = "DummyData.zip") unzip("DummyData.zip") ## View Content list.files() # [1] "all.rda" "Dummy_MapFile.tsv" "Dummy_MapFile.txt" "Dummy_OTU_ref.fa" # [5] "Dummy_OTU_rooted_tree.nwk" "Dummy_OTU_table.tsv" "Dummy_OTU_table.txt" "Dummy_OTU_taxa.tsv" # [9] "Dummy_OTU_tree.tre" "Dummy_OTU_unrooted_tree.nwk" "DummyData.zip" "test.rda" ## View File Content descr::file.head("Dummy_MapFile.tsv", n = 14) descr::file.head("Dummy_OTU_table.txt", n = 3) descr::file.head("Dummy_OTU_rooted_tree.nwk", n = 1)
Import Help¶
?import ?read_tree
Import from tab separated files¶
map.A <- read.table("Dummy_MapFile.tsv") otu.A <- as.matrix(read.table("Dummy_OTU_table.tsv")) tax.A <- as.matrix(read.table("Dummy_OTU_taxa.tsv")) tre.A <- read_tree("Dummy_OTU_unrooted_tree.nwk") #tre.A <- read_tree("Dummy_OTU_rooted_tree.nwk") dA <- phyloseq( otu_table(otu.A, taxa_are_rows = TRUE), sample_data(map.A), tax_table(tax.A), phy_tree(tre.A) ) dA otu_table(dA) tax_table(dA)
Alternative import from text files¶
map.B <- "Dummy_MapFile.txt" otu.B <- "Dummy_OTU_table.txt" #tre.B <- "Dummy_OTU_unrooted_tree.nwk" tre.B <- "Dummy_OTU_rooted_tree.nwk" ref.B <- "Dummy_OTU_ref.fa" dB <- import_qiime( mapfilename = map.B, otufilename = otu.B, treefilename = tre.B, refseqfile = ref.B, verbose = TRUE ) dB otu_table(dB) summary(dB) mode(dB) tax_table(dB)
Phyloseq object¶
# Summarize phyloseq object microbiome::summarize_phyloseq(dA) microbiome::summarize_phyloseq(dB) refseq(dA) # missing reference sequences > empty slot refseq(dB) # reference are present # Dataset dA and dB are more or less indetical. The biggest difference is that # dataset dB contains the reference (OTU) sequences. # I would not import the reference sequences by default. It increases the object # size and might not be needed at the end.
Phyloseq Components - Extraction¶
## Extract the OTU table plus taxonomic information otu.tax.table <- data.frame(OTU = rownames(phyloseq::otu_table(dB)@.Data), phyloseq::otu_table(dB)@.Data, phyloseq::tax_table(dB)@.Data, check.names = FALSE ) otu.tax.table ## Extract and transposed OTU table otu.tab <- t(data.frame(phyloseq::otu_table(dB), check.names = FALSE) ) otu.tab ## Extract meta information meta <- data.frame(phyloseq::sample_data(dB), check.names = FALSE ) meta ## Combine count table and meta information otu.meta.tab <- as.data.frame(cbind(otu.tab, meta)) otu.meta.tab ## Import tree (Newark format) u.tree <- ape::read.tree("Dummy_OTU_unrooted_tree.nwk") r.tree <- ape::read.tree("Dummy_OTU_rooted_tree.nwk") u.tree r.tree par(mfrow = c(1,2)) plot(u.tree, edge.width = 3, cex = 1, edge.color = "tomato", label.offset = 0.01, tip.color = "blue", main = "Unrooted Tree") plot(r.tree, edge.width = 3, cex = 1, edge.color = "tomato", label.offset = 0.01, tip.color = "blue", main = "Rooted Tree (OTU15)") par(mfrow = c(1,1)) ## Re-root tree tree_rerooted <- root(u.tree, outgroup = "OTU15", resolve.root = TRUE) par(mfrow = c(1,3)) plot(u.tree, edge.width = 3, cex = 1.5, edge.color = "tomato", label.offset = 0.01, tip.color = "blue", main = "Unrooted Tree") plot(r.tree, edge.width = 3, cex = 1.5, edge.color = "tomato", label.offset = 0.01, tip.color = "blue", main = "Rooted Tree (OTU15)") plot(tree_rerooted, edge.width = 3, cex = 1.5, edge.color = "tomato", label.offset = 0.01, tip.color = "blue", main = "Re-Rooted Tree (OTU15)") par(mfrow = c(1,1)) ## Circular tree (plot_tree {phyloseq}) plot_tree(dB, color = "GID", label.tips = "taxa_names", shape = "GID", ladderize = "left") + coord_polar(theta = "y")
Phyloseq Accessors¶
## We simplify d <- dB ## Overview ntaxa(d) # number of OTUs nsamples(d) # number of samples taxa_names(d) # OTU labels sample_names(d) # sample id taxa_sums(d) # Count per OTU taxa_sums(d)[c("OTU4","OTU12")] sample_sums(d) # Counts per sample - sequencing depth sample_sums(d)[c("SA1","SA2")] rank_names(d) # Taxonomic rank names sample_variables(d) # Variable defined in mapfile get_taxa(d) # Count table get_sample(d) # Count table get_variable(d) # Meta info
Write phyloseq¶
# write_phyloseq(d, "OTU") # write_phyloseq(d, "TAXONOMY") # write_phyloseq(d, "METADATA")