1. Import libraries
# Import libraries
library("phyloseq")
library("ape")
library("ggplot2")
theme_set(theme_bw()) # set black-white theme for all plots
2. Create phyloseq object
# Load OTUs
in_otu = as.matrix(
read.table('../dadaist_0.9.0/MicrobiomeAnalyst/table.csv',
header=TRUE,
sep=",",
row.names = 1,
comment.char="")
)
# Load taxonomy
in_tax = read.table('../dadaist_0.9.0/MicrobiomeAnalyst/taxonomy.csv',
header=TRUE, sep=",",
row.names = 1,
fill=TRUE,
na.strings=c("","NA","k__", "d__","p__","c__","o__","f__","g__","s__"),
comment.char="")
in_tax$sequenceID<-row.names(in_tax)
in_tax <- as.matrix(in_tax)
# Load tree if available
in_tree <- read.tree('../dadaist_0.9.0/MicrobiomeAnalyst/rep-seqs.tree')
# Import metadata
metaIn = read.table('../dadaist_0.9.0/metadata.tsv',
header=TRUE,
sep="\t",
comment.char="")
rownames(metaIn) <- metaIn[,1]
colnames(metaIn)[1] <- 'sampleID'
in_metad = sample_data(metaIn)
# Generate phyloseq object
my_OTU = otu_table(in_otu, taxa_are_rows = TRUE)
my_TAX = tax_table(in_tax)
my_physeq = phyloseq(
my_OTU,
my_TAX,
in_metad,
in_tree)
# Print phyloseq object
my_physeq
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 218 taxa and 20 samples ]
## sample_data() Sample Data: [ 20 samples by 2 sample variables ]
## tax_table() Taxonomy Table: [ 218 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 218 tips and 216 internal nodes ]
# Compare phyloseq object to automatically created one from dadaist
my_physeq_auto<-readRDS('../dadaist_0.9.0/R/phyloseq.rds')
print(my_physeq_auto)
## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 218 taxa and 20 samples ]
## sample_data() Sample Data: [ 20 samples by 2 sample variables ]
## tax_table() Taxonomy Table: [ 218 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 218 tips and 216 internal nodes ]
3. Create bar charts
# Family colored but OTU level separation
my_physeq_P <- tax_glom(my_physeq,taxrank = "Phylum")
my_barPhyl <- plot_bar(my_physeq_P, fill="Phylum") +
scale_fill_discrete(na.value="grey90") +
ggtitle("Relative abundances at Phylum level") # color by Phylum
my_physeq_C <- tax_glom(my_physeq,taxrank = "Class")
my_barClass <- plot_bar(my_physeq_C, fill="Class") +
scale_fill_discrete(na.value="grey90") +
ggtitle("Relative abundances at Class level") # color by Class
# Show bar plots
my_barPhyl
my_barClass
4. Create bubble plots
## Create bubble plots
# Class level
my_physeq_Class = tax_glom(my_physeq, "Class")
my_barClass <- plot_bar(my_physeq_Class)
my_barClass$layers <- my_barClass$layers[-1]
maxC<-max(otu_table(my_physeq_Class))
my_bubbleClass <- my_barClass +
geom_point(aes_string(x="Sample",
y="Class",
size="Abundance",
color = "Sample" ),
alpha = 0.7 ) +
scale_size_continuous(limits = c(0.001,maxC)) +
xlab("Sample") +
ylab("Class") +
ggtitle("Relative abundances at Class level") +
labs(caption = "Abundances below 0.001 were considered absent")
# Order level
my_physeq_Order = tax_glom(my_physeq, "Order")
my_barOrder <- plot_bar(my_physeq_Order)
my_barOrder$layers <- my_barOrder$layers[-1]
maxC<-max(otu_table(my_physeq_Order))
my_bubbleOrder <- my_barOrder +
geom_point(aes_string(x="Sample",
y="Order",
size="Abundance",
color = "Sample" ),
alpha = 0.7 ) +
scale_size_continuous(limits = c(0.001,maxC)) +
xlab("Sample") +
ylab("Order") +
ggtitle("Relative abundances at Order level") +
labs(caption = "Abundances below 0.001 were considered absent")
# Family level
my_physeq_Fam = tax_glom(my_physeq, "Family")
my_barFam <- plot_bar(my_physeq_Fam)
my_barFam$layers <- my_barFam$layers[-1]
maxC<-max(otu_table(my_physeq_Fam))
my_bubbleFam <- my_barFam +
geom_point(aes_string(x="Sample",
y="Family",
size="Abundance",
color = "Sample" ),
alpha = 0.7) +
scale_size_continuous(limits = c(0.001,maxC)) +
xlab("Sample") +
ylab("Family") +
ggtitle("Relative abundances at Family level") +
labs(caption = "Abundances below 0.001 were considered absent")
# Genus level
my_physeq_Gen = tax_glom(my_physeq, "Genus")
my_barGen <- plot_bar(my_physeq_Gen)
my_barGen$layers <- my_barGen$layers[-1]
maxC<-max(otu_table(my_physeq_Gen))
my_bubbleGen <- my_barGen +
geom_point(aes_string(x="Sample",
y="Genus",
size="Abundance",
color = "Sample" ),
alpha = 0.7) +
scale_size_continuous(limits = c(0.001,maxC)) +
xlab("Sample") +
ylab("Genus") +
ggtitle("Relative abundances at Genus level") +
labs(caption = "Abundances below 0.001 were considered absent")
my_bubbleClass + theme(legend.position = "none")
## Warning: Removed 117 rows containing missing values (geom_point).
my_bubbleOrder + theme(legend.position = "none")
## Warning: Removed 298 rows containing missing values (geom_point).
my_bubbleFam + theme(legend.position = "none")
## Warning: Removed 421 rows containing missing values (geom_point).
my_bubbleGen + theme(legend.position = "none")
## Warning: Removed 598 rows containing missing values (geom_point).