Name Mode Size
R 040000
data 040000
inst 040000
man 040000
tests 040000
vignettes 040000
.Rbuildignore 100644 0 kb
.gitignore 100644 0 kb
.gitlab-ci.yml 100644 1 kb
DESCRIPTION 100644 2 kb
LICENSE.md 100644 34 kb
MethReg.Rproj 100644 0 kb
NAMESPACE 100644 3 kb
NEWS.md 100644 0 kb
README.Rmd 100644 3 kb
README.md 100644 11 kb
_pkgdown.yml 100644 1 kb
README.md
<!-- README.md is generated from README.Rmd. Please edit that file --> # MethReg <!-- badges: start --> [![codecov](https://codecov.io/gl/tiagochst/methtf/branch/%5Cx6d6173746572/graph/badge.svg?token=NESBYPVF64)](https://codecov.io/gl/tiagochst/methtf) [![license](https://img.shields.io/badge/license-GPL%20\(%3E%3D%202\)-blue)]() <!-- badges: end --> `MethReg` can be used to generate testable hypothesis on the synergistic interaction of DMRs and TFs in gene regulation. `MethReg` can be used either to evaluate regulatory potentials of candidate regions or to search for methylation coupled TF regulatory processes in the entire genome. ## Installation You can install the MethReg from Bioconductor with: ``` r BiocManager::install("MethReg") ``` ## Example This is a basic example which shows you how to use the package: ``` r library(MethReg) #--------------------------------------- # Data input #--------------------------------------- # 1) Gene expression matrix # 2) DNA methylation # With same column names data("dna.met.chr21") data("gene.exp.chr21.log2") all(colnames(dna.met.chr21) == colnames(gene.exp.chr21.log2)) #> [1] TRUE # Since we are working with regions we need to map our 450k array to regions dna.met.chr21 <- make_dnam_se(dna.met.chr21) ``` ``` r #--------------------------------------- # Mapping regions #--------------------------------------- # For each region get target gene and predicted TF biding to the regions # get_triplet incorporates two other functions: # 1) get_region_target_gene # 2) get_tf_in_region triplet <- create_triplet_distance_based( region = rownames(dna.met.chr21), motif.search.window.size = 50, motif.search.p.cutoff = 10^-3, target.method = "genes.promoter.overlap", genome = "hg19", cores = 1 ) #> Finding target genes #> Mapping regions to the closest gene #> Looking for TFBS #> #> #> Attaching package: 'S4Vectors' #> The following object is masked from 'package:base': #> #> expand.grid #> #> Attaching package: 'Biostrings' #> The following object is masked from 'package:base': #> #> strsplit #> Joining, by = "regionID" ``` ``` r #--------------------------------------- # Evaluate two models: #--------------------------------------- # 1) target gene ~ TF + DNAm + TF * DNAm # 2) target gene ~ TF + DNAm_group + TF * DNAm_group # where DNAm_group is a binary indicator if the sample belongs to: Q4 or Q1 results <- interaction_model( triplet = triplet, dnam = dna.met.chr21, exp = gene.exp.chr21.log2 ) ``` ``` r head(results) #> regionID target_gene_name target #> 1 chr21:30372219-30372220 RPL23P2 ENSG00000176054 #> 2 chr21:30430511-30430512 AF129075.5 ENSG00000231125 #> 3 chr21:33109780-33109781 AP000255.6 ENSG00000273091 #> 4 chr21:40692859-40692860 BRWD1 ENSG00000185658 #> 5 chr21:43982646-43982647 AP001625.6 ENSG00000235772 #> 6 chr21:43983587-43983588 AP001625.6 ENSG00000235772 #> TF_external_gene_name TF TF_symbol target_symbol met.IQR #> 1 ETS2 ENSG00000157557 ETS2 RPL23P2 0.182568764 #> 2 BACH1 ENSG00000156273 BACH1 AF129075.2 0.310379208 #> 3 GABPA ENSG00000154727 GABPA AP000255.1 0.080160919 #> 4 GABPA ENSG00000154727 GABPA BRWD1 0.040638333 #> 5 GABPA ENSG00000154727 GABPA AP001625.2 0.008670064 #> 6 GABPA ENSG00000154727 GABPA AP001625.2 0.014175786 #> quant_pval_metGrp quant_fdr_metGrp quant_pval_rna.tf quant_fdr_rna.tf #> 1 3.953828e-05 0.0001186148 5.958024e-03 1.787407e-02 #> 2 7.437666e-01 0.7437665914 6.570509e-04 6.570509e-04 #> 3 2.208774e-03 0.0022087741 5.803942e-01 5.803942e-01 #> 4 3.823862e-01 0.3823861582 7.142112e-08 7.142112e-08 #> 5 4.434057e-01 0.4434057288 4.977765e-02 4.977765e-02 #> 6 5.619040e-01 0.9778889092 1.305771e-03 2.611541e-03 #> quant_pval_metGrp:rna.tf quant_fdr_metGrp:rna.tf quant_estimate_metGrp #> 1 3.768097e-05 0.0001130429 -83.041219 #> 2 7.945988e-01 0.7945988318 -3.533136 #> 3 2.305241e-03 0.0023052406 -30.858474 #> 4 4.999473e-01 0.4999473203 -3.386762 #> 5 4.663539e-01 0.4663538741 -10.643704 #> 6 5.234533e-01 0.9782246238 -5.525037 #> quant_estimate_rna.tf quant_estimate_metGrp:rna.tf Model.quantile #> 1 -2.0395999 3.8764266 Robust Linear Model #> 2 1.3437155 0.1642009 Robust Linear Model #> 3 0.2627746 1.8528803 Robust Linear Model #> 4 1.3876278 0.1522536 Robust Linear Model #> 5 1.1156152 0.5884195 Robust Linear Model #> 6 1.2125286 0.3594288 Robust Linear Model #> Wilcoxon_pval_target_q4met_vs_q1met Wilcoxon_pval_tf_q4met_vs_q1met #> 1 0.42735531 0.5707504 #> 2 0.47267559 0.4726756 #> 3 0.49466484 0.9097219 #> 4 0.03763531 0.7913368 #> 5 0.44952133 0.2730363 #> 6 1.00000000 0.2730363 #> % of 0 target genes (Q1 and Q4) #> 1 0 % #> 2 5 % #> 3 20 % #> 4 0 % #> 5 10 % #> 6 5 % ``` # Session information ``` r sessionInfo() #> R version 4.0.2 (2020-06-22) #> Platform: x86_64-apple-darwin17.0 (64-bit) #> Running under: macOS Catalina 10.15.6 #> #> Matrix products: default #> BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib #> LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib #> #> locale: #> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 #> #> attached base packages: #> [1] stats4 parallel stats graphics grDevices utils datasets #> [8] methods base #> #> other attached packages: #> [1] BSgenome.Hsapiens.UCSC.hg19_1.4.3 BSgenome_1.56.0 #> [3] rtracklayer_1.48.0 Biostrings_2.56.0 #> [5] XVector_0.28.0 GenomicRanges_1.40.0 #> [7] GenomeInfoDb_1.24.2 IRanges_2.22.2 #> [9] S4Vectors_0.26.1 sesameData_1.6.0 #> [11] ExperimentHub_1.15.3 AnnotationHub_2.20.2 #> [13] BiocFileCache_1.12.1 dbplyr_1.4.4 #> [15] BiocGenerics_0.34.0 MethReg_0.99.11 #> #> loaded via a namespace (and not attached): #> [1] readxl_1.3.1 backports_1.1.10 #> [3] plyr_1.8.6 BiocParallel_1.22.0 #> [5] ggplot2_3.3.2 TFBSTools_1.26.0 #> [7] digest_0.6.25 foreach_1.5.0 #> [9] htmltools_0.5.0 GO.db_3.11.4 #> [11] magrittr_1.5 memoise_1.1.0 #> [13] doParallel_1.0.15 sfsmisc_1.1-7 #> [15] openxlsx_4.1.5 readr_1.3.1 #> [17] annotate_1.66.0 matrixStats_0.56.0 #> [19] R.utils_2.10.1 JASPAR2020_0.99.10 #> [21] prettyunits_1.1.1 colorspace_1.4-1 #> [23] blob_1.2.1 rappdirs_0.3.1 #> [25] haven_2.3.1 xfun_0.17 #> [27] dplyr_1.0.2 crayon_1.3.4 #> [29] RCurl_1.98-1.2 TFMPvalue_0.0.8 #> [31] iterators_1.0.12 glue_1.4.2 #> [33] gtable_0.3.0 sesame_1.6.0 #> [35] zlibbioc_1.34.0 DelayedArray_0.14.1 #> [37] car_3.0-9 wheatmap_0.1.0 #> [39] Rhdf5lib_1.10.1 HDF5Array_1.16.1 #> [41] abind_1.4-5 scales_1.1.1 #> [43] pscl_1.5.5 DBI_1.1.0 #> [45] rstatix_0.6.0 Rcpp_1.0.5 #> [47] xtable_1.8-4 progress_1.2.2 #> [49] foreign_0.8-80 bit_4.0.4 #> [51] preprocessCore_1.50.0 httr_1.4.2 #> [53] RColorBrewer_1.1-2 ellipsis_0.3.1 #> [55] pkgconfig_2.0.3 XML_3.99-0.5 #> [57] R.methodsS3_1.8.1 DNAcopy_1.62.0 #> [59] tidyselect_1.1.0 rlang_0.4.7 #> [61] reshape2_1.4.4 later_1.1.0.1 #> [63] AnnotationDbi_1.50.3 munsell_0.5.0 #> [65] BiocVersion_3.11.1 cellranger_1.1.0 #> [67] tools_4.0.2 DirichletMultinomial_1.30.0 #> [69] generics_0.0.2 RSQLite_2.2.0 #> [71] broom_0.7.0 evaluate_0.14 #> [73] stringr_1.4.0 fastmap_1.0.1 #> [75] yaml_2.2.1 knitr_1.29 #> [77] bit64_4.0.5 zip_2.1.1 #> [79] caTools_1.18.0 purrr_0.3.4 #> [81] randomForest_4.6-14 KEGGREST_1.28.0 #> [83] mime_0.9 R.oo_1.24.0 #> [85] poweRlaw_0.70.6 pracma_2.2.9 #> [87] compiler_4.0.2 curl_4.3 #> [89] png_0.1-7 interactiveDisplayBase_1.26.3 #> [91] ggsignif_0.6.0 tibble_3.0.3 #> [93] stringi_1.5.3 forcats_0.5.0 #> [95] lattice_0.20-41 CNEr_1.24.0 #> [97] Matrix_1.2-18 vctrs_0.3.4 #> [99] pillar_1.4.6 lifecycle_0.2.0 #> [101] BiocManager_1.30.10 data.table_1.13.0 #> [103] bitops_1.0-6 httpuv_1.5.4 #> [105] R6_2.4.1 promises_1.1.1 #> [107] rio_0.5.16 codetools_0.2-16 #> [109] MASS_7.3-53 gtools_3.8.2 #> [111] assertthat_0.2.1 seqLogo_1.54.3 #> [113] rhdf5_2.32.2 SummarizedExperiment_1.18.2 #> [115] GenomicAlignments_1.24.0 Rsamtools_2.4.0 #> [117] GenomeInfoDbData_1.2.3 hms_0.5.3 #> [119] motifmatchr_1.10.0 grid_4.0.2 #> [121] tidyr_1.1.2 rmarkdown_2.3 #> [123] carData_3.0-4 ggpubr_0.4.0 #> [125] Biobase_2.48.0 shiny_1.5.0 ```