Source code for pymethylprocess.meffil_functions

"""
meffil_functions.py
===================
Contains a few R functions that interact with meffil and minfi.
"""

import rpy2.robjects as robjects

[docs]def load_detection_p_values_beadnum(qc_list, n_cores): """Return list of detection p-value matrix and bead number matrix. Parameters ---------- qc_list R list containing qc objects. n_cores Number of cores to use in computation. """ pval_beadnum = robjects.r("""function(qc.list,mc.cores=1, max.bytes=2^30-1, verbose=F, ...) { qc.objects <- qc.list$qc.objects options(mc.cores=mc.cores) stopifnot(all(sapply(qc.objects, meffil:::is.qc.object))) featuresets <- sapply(qc.objects, function(qc.object) qc.object$featureset) featureset <- featuresets[1] if (is.list(featuresets)) ## backwards compatibility featureset <- featuresets <- "450k" if (any(featuresets != featureset)) stop("Multiple feature sets were used to create these QC objects:", paste(unique(featuresets), collapse=", ")) feature.names <- meffil.get.features(featureset)$name if (!all(sapply(qc.objects, function(qc.object) meffil:::exists.rg(qc.object$basename)))) stop("IDAT files are not accessible for all QC objects") ret.pvalue <- meffil:::mcsapply.safe(qc.objects, function(qc.object) { if (is.null(qc.object$featureset)) ## backwards compatibility qc.object$chip <- "450k" rg <- meffil:::read.rg(qc.object$basename, verbose=verbose) probes <- meffil.probe.info(qc.object$chip) pvalues <- meffil:::extract.detection.pvalues(rg, probes, verbose=verbose) unname(pvalues[feature.names]) }, ..., max.bytes=max.bytes) ret.beadnum <- meffil:::mcsapply.safe(qc.objects, function(qc.object) { if (is.null(qc.object$featureset)) ## backwards compatibility qc.object$chip <- "450k" rg <- meffil:::read.rg(qc.object$basename, verbose=verbose) probes <- meffil.probe.info(qc.object$chip) beadnum <- meffil:::extract.beadnum(rg, probes, verbose=verbose) unname(beadnum[feature.names]) }, ..., max.bytes=max.bytes) dimnames(ret.pvalue) <- list(feature.names, names(qc.objects)) dimnames(ret.beadnum) <- list(feature.names, names(qc.objects)) return(list(p.values=ret.pvalue, beadnum=ret.beadnum)) }""")(qc_list, n_cores) return pval_beadnum
[docs]def set_missing(beta, pval_beadnum, detection_val=1e-6): """Set missing beta values to NA, taking into account detection values and bead number thesholds. Parameters ---------- pval_beadnum Detection pvalues and number of beads per cpg/samples detection_val If threshold to set site to missingness based on p-value detection. """ beta = robjects.r("""function (beta, pval.beadnum, detection.p=1e-6){ p.values <- pval.beadnum$p.values[rownames(beta),colnames(beta)] beadnum <- pval.beadnum$beadnum[rownames(beta),colnames(beta)] beta[((p.values >= detection.p)+(beadnum<3))>0]<-NA return(beta) }""")(beta, pval_beadnum, detection_val) return beta
[docs]def remove_sex(beta, array_type='450k'): """Remove non-autosomal cpgs from beta matrix. Parameters ---------- array_type 450k/850k array? """ beta = robjects.r("""function (beta,array.type){ featureset<-array.type autosomal.sites <- meffil.get.autosomal.sites(featureset) autosomal.sites <- intersect(autosomal.sites, rownames(norm.beta)) norm.beta <- norm.beta[autosomal.sites,] return(beta) }""")(beta,array_type) return beta
[docs]def r_autosomal_cpgs(array_type='450k'): """Return list of autosomal cpg probes per platform. Parameters ---------- array_type 450k/850k array? """ robjects.r('library(meffil)') cpgs = robjects.r("""meffil.get.autosomal.sites('{}')""".format(array_type)) return cpgs
[docs]def r_snp_cpgs(array_type='450k'): """Return list of SNP cpg probes per platform. Parameters ---------- array_type 450k/850k array? """ robjects.r('library(meffil)') cpgs = robjects.r("""meffil.snp.names('{}')""".format(array_type)) return cpgs
[docs]def est_cell_counts_meffil(qc_list, cell_type_reference): """Given QCObject list R object, estimate cell counts using reference approach via meffil. Parameters ---------- qc_list R list containing qc objects. cell_type_reference Reference blood/tissue set.""" cell_count_estimates = robjects.r("""function (qc.list, cell.type.reference) { qc.objects <- qc.list$qc.objects cc<-t(sapply(qc.objects, function(obj) meffil.estimate.cell.counts(obj,cell.type.reference))) cc<-data.frame(IID=row.names(cc),cc) return(cc) }""")(qc_list,cell_type_reference) return cell_count_estimates
[docs]def est_cell_counts_minfi(rgset): """Given RGSet object, estimate cell counts using reference approach via minfi. Parameters ---------- rgset RGSet object stored in python via rpy2""" robjects.r('library(FlowSorted.Blood.450k)') cell_count_estimates = robjects.r("""function (RGset) { cellCounts <- as.table(estimateCellCounts(RGset)) return(cellCounts) }""")(rgset) return cell_count_estimates
[docs]def est_cell_counts_IDOL(rgset,library): """Given RGSet object, estimate cell counts for 450k/850k using reference approach via IDOL library. Parameters ---------- rgset RGSet object stored in python via rpy2 library What type of CpG library to use.""" robjects.r('library(FlowSorted.Blood.EPIC)') cell_count_estimates = robjects.r("""function (RGset) as.table(estimateCellCounts2(RGset,IDOLOptimizedCpGs={})$counts)""".format(library))(rgset) return cell_count_estimates