Browse code

update data recipes in inst/extdata

Qian Liu authored on 28/07/2023 16:11:35
Showing 16 changed files

... ...
@@ -1,6 +1,6 @@
1 1
 Package: ReUseData
2 2
 Title: Reusable and reproducible Data Management
3
-Version: 0.99.37
3
+Version: 0.99.38
4 4
 Authors@R: 
5 5
     person(given = "Qian",
6 6
            family = "Liu",
... ...
@@ -48,12 +48,17 @@ STAR_index <- addMeta(
48 48
         date = Sys.Date(),
49 49
         url = "https://github.com/alexdobin/STAR",
50 50
         example = paste(
51
-            "recipeLoad('STAR_index.R', return = TRUE)",
51
+            "STAR_index <- recipeLoad('STAR_index.R')",
52 52
             "STAR_index$ref <- 'gcpData/reference_genome/GRCh38.primary_assembly.genome.fa'",
53 53
             "STAR_index$gtf <- 'gcpData/gencode_annotation/gencode.v42.annotation.gtf'",
54 54
             "STAR_index$sjdb <- 100",
55 55
             "STAR_index$genomeDir <- 'GRCh38.GENCODE.v42_100'",
56 56
             "STAR_index$threads <- 16",
57 57
             "getData(STAR_index, outdir = 'gcpData', notes =c('STAR_index', 'GRCh38.primary_assembly', 'gencode.v42', 'star_2.7.9a'))",
58
+            "",
59
+            "## Get data from local catch",
60
+            "dataUpdate('data/folder')", 
61
+            "dataSearch(c('STAR_index', 'GRCh38.primary_assembly', 'gencode.v42', 'star_2.7.9a'))",
62
+            "", 
58 63
             sep="\n"))
59 64
 )
... ...
@@ -25,8 +25,13 @@ bowtie2_index <- addMeta(
25 25
         date = Sys.Date(),
26 26
         url = "https://bowtie-bio.sourceforge.net/bowtie2/index.shtml",
27 27
         example = paste(
28
-            "recipeLoad('bowtie2_index.R', return = TRUE)",
29
-            "bowtie2_index$genome <- 'GRCh38.primary_assembly.genome.fa'",
28
+            "bowtie2_index <- recipeLoad('bowtie2_index')",
29
+            "bowtie2_index$genome <- 'GRCh38.primary_assembly.genome.fa' ## need to be a valid file path", 
30 30
             "getData(bowtie2_index, outdir = 'data/folder', notes = c('bowtie2_index', 'GRCh38.primary_assembly'))",
31
+            "",
32
+            "## Get data from local catch",
33
+            "dataUpdate('data/folder')", 
34
+            "dataSearch(c('bowtie2_index', 'GRCh38'))",
35
+            "", 
31 36
             sep="\n"))
32 37
 )
... ...
@@ -25,11 +25,15 @@ echo_out <- addMeta(
25 25
         date = Sys.Date(),
26 26
         example = paste(
27 27
             "## Get data from evaluting recipe",
28
-            "recipeLoad('echo_out', return = TRUE)",
28
+            "echo_out <- recipeLoad('echo_out')",
29 29
             "echo_out$input <- 'Hello World'",
30 30
             "echo_out$outfile <- 'echoHelloWorld'",
31 31
             "getData(echo_out, outdir = 'data/folder', notes = c('echo', 'hello', 'world'))",
32
+            "",
33
+            "## Get data from local catch",
34
+            "dataUpdate('data/folder')", 
35
+            "dataSearch(c('echo', 'hello'))",
36
+            "", 
32 37
             sep="\n"))
33
-
34 38
 )
35 39
 
... ...
@@ -38,12 +38,16 @@ ensembl_liftover <- addMeta(
38 38
         date = Sys.Date(),
39 39
         example = paste(
40 40
             "## Get data from evaluting recipe",
41
-            "recipeLoad('ensembl_liftover', return = TRUE)",
41
+            "ensembl_liftover <- recipeLoad('ensembl_liftover')",
42 42
             "ensembl_liftover$species <- 'human'",
43 43
             "ensembl_liftover$from <- 'GRCh37'",
44 44
             "ensembl_liftover$to <- 'GRCh38'",
45 45
             "getData(ensembl_liftover, outdir = 'data/folder', notes = c('grch37', 'grch38'))",
46 46
             "",
47
+            "## Get data from local catch",
48
+            "dataUpdate('data/folder')", 
49
+            "dataSearch(c('ensembl', 'GRCh37'))",
50
+            "", 
47 51
             "## Get data from Google bucket directly",
48 52
             "dataUpdate('data/folder', cloud=TRUE)",
49 53
             "dh <- dataSearch(c('ensembl', 'GRCh37'))", 
... ...
@@ -27,8 +27,13 @@ gcp_broad_gatk_hg19 <- addMeta(
27 27
         date = Sys.Date(),
28 28
         url = "https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg19/v0",
29 29
         example = paste(
30
-            "recipeLoad('gcp_broad_gatk_hg19', return = TRUE)",
30
+            "gcp_broad_gatk_hg19 <- recipeLoad('gcp_broad_gatk_hg19')",
31 31
             "gcp_broad_gatk_hg19$filename <- '1000G_omni2.5.b37.vcf.gz'",
32 32
             "getData(gcp_broad_gatk_hg19, outdir = 'data/folder', notes = c('gcp', 'broad', 'reference', 'hg19', 'v0', '1000G', 'omni2.5'))",
33
+            "",
34
+            "## Get data from local catch",
35
+            "dataUpdate('data/folder')", 
36
+            "dataSearch(c('gcp', 'broad', 'hg19'))",
37
+            "", 
33 38
             sep="\n"))
34 39
 )
... ...
@@ -26,8 +26,14 @@ gcp_broad_gatk_hg38 <- addMeta(
26 26
         date = Sys.Date(),
27 27
         url = "https://console.cloud.google.com/storage/browser/gcp-public-data--broad-references/hg38/v0",
28 28
         example = paste(
29
-            "recipeLoad('gcp_broad_gatk_hg38', return = TRUE)",
29
+            "gcp_broad_gatk_hg38 <- recipeLoad('gcp_broad_gatk_hg38')",
30 30
             "gcp_broad_gatk_hg38$filename <- '1000G_omni2.5.hg38.vcf.gz'",
31 31
             "getData(gcp_broad_gatk_hg38, outdir = 'data/folder', notes = c('gcp', 'broad', 'reference', 'hg38', 'v0', '1000G', 'omni2.5')",
32
+            "",
33
+            "## Get data from local catch",
34
+            "dataUpdate('data/folder')", 
35
+            "dataSearch(c('gcp', 'broad', 'hg38'))",
36
+            "", 
37
+
32 38
             sep="\n"))
33 39
 )
... ...
@@ -27,9 +27,14 @@ gcp_gatk_mutect2_b37 <- addMeta(
27 27
         date = Sys.Date(),
28 28
         url = "https://console.cloud.google.com/storage/browser/gatk-best-practices/somatic-b37",
29 29
         example = paste(
30
-            "recipeLoad('gcp_gatk_mutect2_b37', return = TRUE)",
30
+            "gcp_gatk_mutect2_b37 <- recipeLoad('gcp_gatk_mutect2_b37')",
31 31
             "gcp_gatk_mutect2_b37$filename <- 'small_exac_common_3.vcf'",
32 32
             "gcp_gatk_mutect2_b37$idx <- 'idx'",
33
-            "getData(gcp_gatk_mutect2_b37, outdir = 'data/folder', notes = c('gcp', 'broad', 'mutect2', 'small_exac_common'))",
33
+            "getData(gcp_gatk_mutect2_b37, outdir = 'data/folder', notes = c('gcp', 'gatk', 'mutect2', 'b37', 'small_exac_common'))",
34
+            "",
35
+            "## Get data from local catch",
36
+            "dataUpdate('data/folder')", 
37
+            "dataSearch(c('gcp', 'gatk', 'b37'))",
38
+            "", 
34 39
             sep="\n"))
35 40
 )
... ...
@@ -26,9 +26,14 @@ gcp_gatk_mutect2_hg38 <- addMeta(
26 26
         date = Sys.Date(),
27 27
         url = "https://console.cloud.google.com/storage/browser/gatk-best-practices/somatic-hg38",
28 28
         example = paste(
29
-            "recipeLoad('gcp_gatk_mutect2_hg38', return = TRUE)",
29
+            "gcp_gatk_mutect2_hg38 <- recipeLoad('gcp_gatk_mutect2_hg38')",
30 30
             "gcp_gatk_mutect2_hg38$filename <- 'small_exac_common_3.hg38.vcf.gz'",
31 31
             "gcp_gatk_mutect2_hg38$idx <- 'tbi'",
32
-            "getData(gcp_gatk_mutect2_hg38, outdir = 'data/folder', notes = c('gcp', 'broad', 'mutect2', 'small_exac_common'))",
32
+            "getData(gcp_gatk_mutect2_hg38, outdir = 'data/folder', notes = c('gcp', 'gatk', 'mutect2', 'hg38', 'small_exac_common'))",
33
+            "",
34
+            "## Get data from local catch",
35
+            "dataUpdate('data/folder')", 
36
+            "dataSearch(c('gcp', 'gatk', 'hg38'))",
37
+            "", 
33 38
             sep="\n"))
34 39
 )
... ...
@@ -31,11 +31,15 @@ gencode_annotation <- addMeta(
31 31
         date = Sys.Date(),
32 32
         example = paste(
33 33
             "## Get data from evaluting recipe",
34
-            "recipeLoad('gencode_annotation', return = TRUE)",
34
+            "gencode_annotation <- recipeLoad('gencode_annotation')",
35 35
             "gencode_annotation$species <- 'human'",
36 36
             "gencode_annotation$version <- '42'",
37 37
             "getData(gencode_annotation, outdir = 'data/folder', notes = c('gencode', 'annotation', 'human', '42'))",
38 38
             "",
39
+            "## Get data from local catch",
40
+            "dataUpdate('data/folder')", 
41
+            "dataSearch(c('gencode', 'annotation', 'human', '42'))",
42
+            "", 
39 43
             "## Get data from Google bucket directly",
40 44
             "dataUpdate('data/folder', cloud=TRUE)",
41 45
             "dh <- dataSearch(c('gencode', 'annotation'))", 
... ...
@@ -17,9 +17,13 @@ gencode_genome_grch38 <- addMeta(
17 17
         date = Sys.Date(),
18 18
         example = paste(
19 19
             "Get data from evaluting recipe",
20
-            "recipeLoad('gencode_genome_grch38', return = TRUE)",
20
+            "gencode_genome_grch38 <- recipeLoad('gencode_genome_grch38')",
21 21
             "getData(gencode_genome_grch38, outdir = 'data/folder', notes = c('gencode', 'genome', 'grch38', 'release 42'))",
22 22
             "",
23
+            "## Get data from local catch",
24
+            "dataUpdate('data/folder')", 
25
+            "dataSearch(c('gencode', 'genome', 'grch38', '42'))",
26
+            "", 
23 27
             "## Get data from Google bucket directly",
24 28
             "dataUpdate('data/folder', cloud = TRUE)",
25 29
             "dh <- dataSearch(c('gencode', 'genome', 'grch38'))",
... ...
@@ -37,11 +37,15 @@ gencode_transcripts <- addMeta(
37 37
         date = Sys.Date(),
38 38
         example = paste(
39 39
             "## Get data from evaluating recipe",
40
-            "recipeLoad('gencode_transcripts', return = TRUE)",
40
+            "gencode_transcripts <- recipeLoad('gencode_transcripts')",
41 41
             "gencode_transcripts$species <- 'mouse'",
42 42
             "gencode_transcripts$version <- 'M31'",
43 43
             "getData(gencode_transcripts, outdir = 'data/folder', notes = c('gencode', 'transcripts', 'mouse', 'M31'))",
44 44
             "",
45
+            "## Get data from local catch",
46
+            "dataUpdate('data/folder')", 
47
+            "dataSearch(c('gencode', 'transcripts', 'mouse', 'M31'))",
48
+            "", 
45 49
             "## Get data from Google bucket directly",
46 50
             "dataUpdate('data/folder', cloud=TRUE)",
47 51
             "dh <- dataSearch(c('gencode', 'transcripts'))", 
... ...
@@ -25,8 +25,13 @@ hisat2_index <- addMeta(
25 25
         date = Sys.Date(),
26 26
         url = "http://daehwankimlab.github.io/hisat2/",
27 27
         example = paste(
28
-            "recipeLoad('hisat2_index.R', return = TRUE)",
28
+            "hisat2_index.R <- recipeLoad('hisat2_index.R')",
29 29
             "hisat2_index$genome <- 'GRCh38.primary_assembly.genome.fa'",
30 30
             "getData(hisat2_index, outdir = 'data/folder', notes = c('hisat2_index', 'GRCh38.primary_assembly'))",
31
+            "",
32
+            "## Get data from local catch",
33
+            "dataUpdate('data/folder')", 
34
+            "dataSearch(c('hisat2_index', 'GRCh38.primary_assembly'))",
35
+            "", 
31 36
             sep="\n"))
32 37
 )
... ...
@@ -66,10 +66,14 @@ reference_genome <- addMeta(
66 66
         date = Sys.Date(),
67 67
         example = paste(
68 68
             "## Get data from evaluating recipe",
69
-            "recipeLoad(reference_genome, return=TRUE)",
69
+            "reference_genome <- recipeLoad(reference_genome)",
70 70
             "reference_genome$fasta = 'http://ftp.ensembl.org/pub/release-104/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.chromosome.MT.fa.gz'",
71 71
             "getData(reference_genome, outdir = 'data/folder', notes = c('homo sapiens', 'grch38', 'ensembl'), conda = TRUE, docker = FALSE)",
72 72
             "",
73
+            "## Get data from local catch",
74
+            "dataUpdate('data/folder')", 
75
+            "dataSearch(c('homo sapiens', 'grch38', 'ensembl'))",
76
+            "", 
73 77
             "## Get data from Google bucket directly",
74 78
             "dataUpdate('data/folder', cloud=TRUE)",
75 79
             "dh <- dataSearch(c('homo sapiens', 'grch38', '1000 genomes'))", 
... ...
@@ -31,9 +31,14 @@ salmon_index <- addMeta(
31 31
         date = Sys.Date(),
32 32
         url = "https://salmon.readthedocs.io/en/latest/salmon.html",
33 33
         example = paste(
34
-            "recipeLoad('salmon_index.R', return = TRUE)",
34
+            "salmon_index <- recipeLoad('salmon_index.R')",
35 35
             "salmon_index$genome <- 'GRCh38.primary_assembly.genome.fa'",
36 36
             "salmon_index$transcript <- 'gencode.v42.transcripts.fa'",
37 37
             "getData(salmon_index, outdir = 'data/folder', notes = c('salmon_index', 'GRCh38.primary_assembly', 'gencode.v42'))",
38
+            "",
39
+            "## Get data from local catch",
40
+            "dataUpdate('data/folder')", 
41
+            "dataSearch(c('salmon_index', 'GRCh38.primary_assembly', 'gencode.v42'))",
42
+            "", 
38 43
             sep="\n"))
39 44
 )
... ...
@@ -28,9 +28,14 @@ ucsc_database <- addMeta(
28 28
         date = Sys.Date(),
29 29
         example = paste(
30 30
             "## Get data from evaluting recipe",
31
-            "recipeLoad('ucsc_database', return = TRUE)",
31
+            "ucsc_database <- recipeLoad('ucsc_database')",
32 32
             "ucsc_database$build <- 'hg38'",
33 33
             "ucsc_database$dbname <- 'refGene'",
34 34
             "getData(ucsc_database, outdir = 'data/folder', notes = c('ucsc', 'annotation', 'database', 'hg38', 'refGene'))",
35
+            "",
36
+            "## Get data from local catch",
37
+            "dataUpdate('data/folder')", 
38
+            "dataSearch(c('ucsc', 'annotation', 'database', 'hg38', 'refGene'))",
39
+            "", 
35 40
             sep="\n"))
36 41
 )