[iBio] http://www.ibio.cl/2019/08/06/workshop-ii-ibio-bioinformatica iBio
if (!requireNamespace(“BiocManager”, quietly = TRUE))
install.packages(“BiocManager”)
BiocManager::install(“DESeq2”)
library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter,
## Find, get, grep, grepl, intersect, is.unsorted, lapply, Map,
## mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, Position, rank, rbind, Reduce, rownames, sapply,
## setdiff, sort, table, tapply, union, unique, unsplit, which,
## which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:grDevices':
##
## windows
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
## Warning: package 'matrixStats' was built under R version 3.6.1
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## Loading required package: BiocParallel
## Warning: package 'BiocParallel' was built under R version 3.6.1
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
setwd("~/iBio/workshop2019/Sesion5_RNAseq/cyverse/At")
The DESeq2 model internally corrects for library size, so transformed or normalized values such as counts scaled by library size should not be used as input.
counts<-data.frame(read.table("At.1M.separated.quants.txt",row.names = 1,header = T,sep = "\t"))
colnames(counts)<-c("C1","C2","C3","T1","T2","T3")
head(counts,2)
## C1 C2 C3 T1 T2 T3
## AT1G51370.2 0 0 1 3 5 5
## AT1G50920.1 134 124 117 104 111 99
condition <- factor(rep(c("control", "treated"), each = 3))
condition
## [1] control control control treated treated treated
## Levels: control treated
En el siguiente link encontrarán más información al respecto (http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#multi-factor-designs)
genotype <- factor(c(rep("col0", each = 3),rep("col1", each = 3)))
genotype
## [1] col0 col0 col0 col1 col1 col1
## Levels: col0 col1
targets <- data.frame(condition,genotype)
targets
## condition genotype
## 1 control col0
## 2 control col0
## 3 control col0
## 4 treated col1
## 5 treated col1
## 6 treated col1
dds=DESeqDataSetFromMatrix(counts,targets,design=~condition)
dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
head(counts(dds))
## C1 C2 C3 T1 T2 T3
## AT1G51370.2 0 0 1 3 5 5
## AT1G50920.1 134 124 117 104 111 99
## AT1G36960.1 0 0 0 0 0 0
## AT1G44020.1 2 2 0 3 2 1
## AT1G15970.1 13 9 10 17 12 13
## AT1G73440.1 8 8 7 11 5 3
head(counts(dds,normalized=T))
## C1 C2 C3 T1 T2
## AT1G51370.2 0.000000 0.000000 1.015264 2.852986 5.059133
## AT1G50920.1 126.428561 118.104858 118.785920 98.903506 112.312745
## AT1G36960.1 0.000000 0.000000 0.000000 0.000000 0.000000
## AT1G44020.1 1.886993 1.904917 0.000000 2.852986 2.023653
## AT1G15970.1 12.265457 8.572127 10.152643 16.166919 12.141918
## AT1G73440.1 7.547974 7.619668 7.106850 10.460948 5.059133
## T3
## AT1G51370.2 5.110874
## AT1G50920.1 101.195301
## AT1G36960.1 0.000000
## AT1G44020.1 1.022175
## AT1G15970.1 13.288272
## AT1G73440.1 3.066524
write.csv(counts(dds), file = "quants_normalized.csv")
write.csv(counts(dds,normalized=T), file = "quants_normalized.csv")
par(mfrow=c(1,2))
boxplot(log2(counts(dds, normalized=FALSE)+1), main="Raw counts", col=rep(c("lightblue","lightgreen"), each=3), ylim = c(0, 15),las=2)
boxplot(log2(counts(dds, normalized=TRUE)+1), main="Normalized counts", col=rep(c("lightblue","lightgreen"), each=3), ylim = c(0, 15),las=2)
pendiente
pendiente
resultsNames(dds)
## [1] "Intercept" "condition_treated_vs_control"
res <- results(dds, name="condition_treated_vs_control")
summary(res)
##
## out of 24811 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up) : 684, 2.8%
## LFC < 0 (down) : 483, 1.9%
## outliers [1] : 58, 0.23%
## low counts [2] : 7092, 29%
## (mean count < 3)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
head(res)
## log2 fold change (MLE): condition treated vs control
## Wald test p-value: condition treated vs control
## DataFrame with 6 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## AT1G51370.2 2.33970941669419 3.64019320886484 1.57207668466735
## AT1G50920.1 112.621815203343 -0.219027281001241 0.146200722103832
## AT1G36960.1 0 NA NA
## AT1G44020.1 1.61512068170417 0.627809053853954 1.4267505610525
## AT1G15970.1 12.0978894067029 0.426384099590713 0.471639948169337
## AT1G73440.1 6.81018277238528 -0.252147570834862 0.716498225603596
## stat pvalue padj
## <numeric> <numeric> <numeric>
## AT1G51370.2 2.31553157957757 0.0205838583757382 NA
## AT1G50920.1 -1.49812721749546 0.134100200854798 0.6404390609239
## AT1G36960.1 NA NA NA
## AT1G44020.1 0.440027199562357 0.65991740761844 NA
## AT1G15970.1 0.904045768908498 0.365971132749654 0.89704787042123
## AT1G73440.1 -0.351916532134392 0.724900860457775 0.997056853062986
alpha=0.05
FC=4
padjlim=NULL
logFClim=7
res$padj[which(res$padj==0)] <- .Machine$double.xmin
log10pval <- -log10(res$padj)
if (is.null(padjlim)){
ylim <- c(0,1) * quantile(log10pval, probs=0.99, na.rm=TRUE)
} else{
ylim <- c(0,1) * -log10(padjlim)
}
if (is.null(logFClim)){
xlim <- NULL
#xlim <- c(-1,1) * quantile(res$log2FoldChange, probs=0.999, na.rm=TRUE)
}else{
xlim <- c(-1,1)*logFClim
}
plot(res$log2FoldChange, pmin(ylim[2], log10pval), ylim=ylim, xlim=xlim, las=1, cex=0.45,
xlab=expression(log[2]~fold~change), ylab=expression(-log[10]~adjusted~P~value),
col=ifelse(res$padj <= alpha & (res$log2FoldChange <= -log2(FC) | res$log2FoldChange >= log2(FC)), "red", "black"), pch=ifelse(log10pval >= ylim[2], 2, 20),
main=paste0("Volcano plot - ",gsub(".*: ","",res@elementMetadata@listData[["description"]][2])))
abline(h=-log10(alpha), col="black", lty=4, lwd=2.0)
abline(v=0, col="black", lty=3, lwd=1.0)
abline(v=-log2(FC), col="black", lty=4, lwd=2.0)
abline(v=log2(FC), col="black", lty=4, lwd=2.0)
sum(res$padj<0.05, na.rm = T)
## [1] 897
sum(res$padj<0.05 & res$log2FoldChange>0, na.rm = T)
## [1] 542
sum(res$padj<0.05 & res$log2FoldChange<0, na.rm = T)
## [1] 355
sum(res$padj<0.05 & abs(res$log2FoldChange)>=0, na.rm = T)
## [1] 897
sum(res$padj<0.05 & res$log2FoldChange>=1, na.rm = T)
## [1] 409
sum(res$padj<0.05 & res$log2FoldChange<=-1, na.rm = T)
## [1] 228
up_reg <- row.names(subset(res, res$padj < 0.05 & res$log2FoldChange >= 1))
head(up_reg)
## [1] "AT1G29020.1" "AT1G65610.1" "AT1G17380.1" "AT1G65690.1" "AT1G73220.1"
## [6] "AT1G17420.1"
down_reg <- row.names(subset(res, res$padj < 0.05 & res$log2FoldChange <=- 1))
head(down_reg)
## [1] "AT1G74770.1" "AT1G70850.1" "AT1G74940.1" "AT1G73600.2" "AT1G70890.1"
## [6] "AT1G69160.1"
write.table( up_reg, file = "up_reg.txt", col.names = FALSE, row.names = FALSE, quote = FALSE)
write.table( down_reg, file = "down_reg.txt", col.names = FALSE, row.names = FALSE, quote = FALSE)
FIN… por ahora