Browse code

- Initial progress creating website with articles for package.

Dario Strbenac authored on 08/11/2022 06:00:07
Showing 104 changed files

... ...
@@ -3,5 +3,8 @@
3 3
 ^_pkgdown\.yml$
4 4
 ^docs$
5 5
 ^pkgdown$
6
-vignettes/test.Rmd
6
+vignettes/introduction.Rmd
7
+vignettes/performanceEvaluation.Rmd
8
+vignettes/multiViewMethods.Rmd
9
+vignettes/incorporateNew.Rmd
7 10
 ^\.github$
8 11
deleted file mode 100644
... ...
@@ -1 +0,0 @@
1
-*.html
2 0
deleted file mode 100644
... ...
@@ -1,46 +0,0 @@
1
-# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2
-# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3
-on:
4
-  push:
5
-    branches: [main, master]
6
-  pull_request:
7
-    branches: [main, master]
8
-  release:
9
-    types: [published]
10
-  workflow_dispatch:
11
-
12
-name: pkgdown
13
-
14
-jobs:
15
-  pkgdown:
16
-    runs-on: ubuntu-latest
17
-    # Only restrict concurrency for non-PR jobs
18
-    concurrency:
19
-      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20
-    env:
21
-      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22
-    steps:
23
-      - uses: actions/checkout@v3
24
-
25
-      - uses: r-lib/actions/setup-pandoc@v2
26
-
27
-      - uses: r-lib/actions/setup-r@v2
28
-        with:
29
-          use-public-rspm: true
30
-
31
-      - uses: r-lib/actions/setup-r-dependencies@v2
32
-        with:
33
-          extra-packages: any::pkgdown, local::.
34
-          needs: website
35
-
36
-      - name: Build site
37
-        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38
-        shell: Rscript {0}
39
-
40
-      - name: Deploy to GitHub pages 🚀
41
-        if: github.event_name != 'pull_request'
42
-        uses: JamesIves/github-pages-deploy-action@v4.4.1
43
-        with:
44
-          clean: false
45
-          branch: gh-pages
46
-          folder: docs
... ...
@@ -2,4 +2,3 @@
2 2
 .Rhistory
3 3
 .RData
4 4
 .Ruserdata
5
-docs
6 5
new file mode 100644
... ...
@@ -0,0 +1,93 @@
1
+<!DOCTYPE html>
2
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en">
3
+<head>
4
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+<meta charset="utf-8">
6
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
7
+<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8
+<title>Page not found (404) • ClassifyR</title>
9
+<script src="https://sydneybiox.github.io/ClassifyR/deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
10
+<link href="https://sydneybiox.github.io/ClassifyR/deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
11
+<script src="https://sydneybiox.github.io/ClassifyR/deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
12
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
13
+<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="https://sydneybiox.github.io/ClassifyR/pkgdown.js"></script><meta property="og:title" content="Page not found (404)">
14
+<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
15
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
16
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
17
+<![endif]-->
18
+</head>
19
+<body>
20
+    <a href="https://sydneybiox.github.io/ClassifyR/#main" class="visually-hidden-focusable">Skip to contents</a>
21
+    
22
+
23
+    <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
24
+    
25
+    <a class="navbar-brand me-2" href="https://sydneybiox.github.io/ClassifyR/index.html">ClassifyR</a>
26
+
27
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">3.3.1</small>
28
+
29
+    
30
+    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
31
+      <span class="navbar-toggler-icon"></span>
32
+    </button>
33
+
34
+    <div id="navbar" class="collapse navbar-collapse ms-3">
35
+      <ul class="navbar-nav me-auto">
36
+<li class="nav-item">
37
+  <a class="nav-link" href="https://sydneybiox.github.io/ClassifyR/articles/ClassifyR.html">Get started</a>
38
+</li>
39
+<li class="nav-item">
40
+  <a class="nav-link" href="https://sydneybiox.github.io/ClassifyR/reference/index.html">Reference</a>
41
+</li>
42
+<li class="nav-item dropdown">
43
+  <a href="https://sydneybiox.github.io/ClassifyR/#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
44
+  <div class="dropdown-menu" aria-labelledby="dropdown-articles">
45
+    <a class="dropdown-item" href="https://sydneybiox.github.io/ClassifyR/articles/DevelopersGuide.html">**ClassifyR** Developer's Guide</a>
46
+    <a class="dropdown-item" href="https://sydneybiox.github.io/ClassifyR/articles/incorporateNew.html">Creating a Wrapper for New Functionality and Registering It</a>
47
+    <a class="dropdown-item" href="https://sydneybiox.github.io/ClassifyR/articles/introduction.html">Introduction to the Concepts of ClassifyR</a>
48
+    <a class="dropdown-item" href="https://sydneybiox.github.io/ClassifyR/articles/multiViewMethods.html">Multi-view Methods for Modelling of Multiple Data Views</a>
49
+    <a class="dropdown-item" href="https://sydneybiox.github.io/ClassifyR/articles/performanceEvaluation.html">Performance Evaluation of Fitted Models</a>
50
+  </div>
51
+</li>
52
+      </ul>
53
+<form class="form-inline my-2 my-lg-0" role="search">
54
+        <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="search.json" id="search-input" placeholder="Search for" autocomplete="off">
55
+</form>
56
+
57
+      <ul class="navbar-nav"></ul>
58
+</div>
59
+
60
+    
61
+  </div>
62
+</nav><div class="container template-title-body">
63
+<div class="row">
64
+  <main id="main" class="col-md-9"><div class="page-header">
65
+      <img src="https://sydneybiox.github.io/ClassifyR/" class="logo" alt=""><h1>Page not found (404)</h1>
66
+      
67
+    </div>
68
+
69
+Content not found. Please use links in the navbar.
70
+
71
+  </main>
72
+</div>
73
+
74
+
75
+    <footer><div class="pkgdown-footer-left">
76
+  <p></p>
77
+<p>Developed by Dario Strbenac.</p>
78
+</div>
79
+
80
+<div class="pkgdown-footer-right">
81
+  <p></p>
82
+<p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.6.</p>
83
+</div>
84
+
85
+    </footer>
86
+</div>
87
+
88
+  
89
+
90
+  
91
+
92
+  </body>
93
+</html>
0 94
new file mode 100644
... ...
@@ -0,0 +1,1292 @@
1
+<!DOCTYPE html>
2
+<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="ClassifyR"><title>An Introduction to **ClassifyR** • ClassifyR</title><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to **ClassifyR**"><meta property="og:description" content="ClassifyR"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
3
+<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
4
+<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
5
+<![endif]--></head><body>
6
+    <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
7
+    
8
+
9
+    <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
10
+    
11
+    <a class="navbar-brand me-2" href="../index.html">ClassifyR</a>
12
+
13
+    <small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">3.3.1</small>
14
+
15
+    
16
+    <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
17
+      <span class="navbar-toggler-icon"></span>
18
+    </button>
19
+
20
+    <div id="navbar" class="collapse navbar-collapse ms-3">
21
+      <ul class="navbar-nav me-auto"><li class="active nav-item">
22
+  <a class="nav-link" href="../articles/ClassifyR.html">Get started</a>
23
+</li>
24
+<li class="nav-item">
25
+  <a class="nav-link" href="../reference/index.html">Reference</a>
26
+</li>
27
+<li class="nav-item dropdown">
28
+  <a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
29
+  <div class="dropdown-menu" aria-labelledby="dropdown-articles">
30
+    <a class="dropdown-item" href="../articles/DevelopersGuide.html">**ClassifyR** Developer's Guide</a>
31
+    <a class="dropdown-item" href="../articles/incorporateNew.html">Creating a Wrapper for New Functionality and Registering It</a>
32
+    <a class="dropdown-item" href="../articles/introduction.html">Introduction to the Concepts of ClassifyR</a>
33
+    <a class="dropdown-item" href="../articles/multiViewMethods.html">Multi-view Methods for Modelling of Multiple Data Views</a>
34
+    <a class="dropdown-item" href="../articles/performanceEvaluation.html">Performance Evaluation of Fitted Models</a>
35
+  </div>
36
+</li>
37
+      </ul><form class="form-inline my-2 my-lg-0" role="search">
38
+        <input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
39
+
40
+      <ul class="navbar-nav"></ul></div>
41
+
42
+    
43
+  </div>
44
+</nav><div class="container template-article">
45
+
46
+
47
+
48
+
49
+<div class="row">
50
+  <main id="main" class="col-md-9"><div class="page-header">
51
+      <img src="" class="logo" alt=""><h1>An Introduction to **ClassifyR**</h1>
52
+                        <h4 data-toc-skip class="author">Dario Strbenac,
53
+Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University
54
+of Sydney, Australia.</h4>
55
+            
56
+      
57
+      
58
+      <div class="d-none name"><code>ClassifyR.Rmd</code></div>
59
+    </div>
60
+
61
+    
62
+    
63
+<div id="installation" class="section level2">
64
+<h2>Installation</h2>
65
+<p>Typically, each feature selection method or classifier originates
66
+from a different R package, which <strong>ClassifyR</strong> provides a
67
+wrapper around. By default, only high-performance t-test/F-test and
68
+random forest are installed. If you intend to compare between numerous
69
+different modelling methods, you should install all suggested packages
70
+at once by using the command
71
+<code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>.
72
+This will take a few minutes, particularly on Linux, because each
73
+package will be compiled from source code.</p>
74
+</div>
75
+<div id="overview" class="section level2">
76
+<h2>Overview</h2>
77
+<p><strong>ClassifyR</strong> provides a structured pipeline for
78
+cross-validated classification. Classification is viewed in terms of
79
+four stages, data transformation, feature selection, classifier
80
+training, and prediction. The driver functions <em>crossValidate</em>
81
+and <em>runTests</em> implements varieties of cross-validation. They
82
+are:</p>
83
+<ul>
84
+<li>Permutation of the order of samples followed by k-fold
85
+cross-validation (runTests only)</li>
86
+<li>Repeated x% test set cross-validation</li>
87
+<li>leave-k-out cross-validation</li>
88
+</ul>
89
+<p>Driver functions can use parallel processing capabilities in R to
90
+speed up cross-validations when many CPUs are available. The output of
91
+the driver functions is a <em>ClassifyResult</em> object which can be
92
+directly used by the performance evaluation functions. The process of
93
+classification is summarised by a flowchart.</p>
94
+<img src="" style="margin-left: auto;margin-right: auto"/>
95
+<p>Importantly, ClassifyR implements a number of methods for
96
+classification using different kinds of changes in measurements between
97
+classes. Most classifiers work with features where the means are
98
+different. In addition to changes in means (DM),
99
+<strong>ClassifyR</strong> also allows for classification using
100
+differential variability (DV; changes in scale) and differential
101
+distribution (DD; changes in location and/or scale).</p>
102
+<div id="case-study-diagnosing-asthma" class="section level3">
103
+<h3>Case Study: Diagnosing Asthma</h3>
104
+<p>To demonstrate some key features of ClassifyR, a data set consisting
105
+of the 2000 most variably expressed genes and 190 people will be used to
106
+quickly obtain results. The journal article corresponding to the data
107
+set was published in <em>Scientific Reports</em> in 2018 and is titled
108
+<a href="http://www.nature.com/articles/s41598-018-27189-4">A Nasal
109
+Brush-based Classifier of Asthma Identified by Machine Learning Analysis
110
+of Nasal RNA Sequence Data</a>.</p>
111
+<p>Load the package.</p>
112
+<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(ClassifyR)</span></code></pre></div>
113
+<pre><code>## Warning: multiple methods tables found for &#39;aperm&#39;</code></pre>
114
+<pre><code>## Warning: replacing previous import &#39;BiocGenerics::aperm&#39; by &#39;DelayedArray::aperm&#39; when loading &#39;SummarizedExperiment&#39;</code></pre>
115
+<p>A glimpse at the RNA measurements and sample classes.</p>
116
+<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">data</span>(asthma) <span class="co"># Contains measurements and classes variables.</span></span>
117
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>measurements[<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>, <span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>]</span></code></pre></div>
118
+<pre><code>##            HBB BPIFA1  XIST FCGR3B HBA2
119
+## Sample 1  9.72  14.06 12.28  11.42 7.83
120
+## Sample 2 11.98  13.89  6.35  13.25 9.42
121
+## Sample 3 12.15  17.44 10.21   7.87 9.68
122
+## Sample 4 10.60  11.87  6.27  14.75 8.96
123
+## Sample 5  8.18  15.01 11.21   6.77 6.43</code></pre>
124
+<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(classes)</span></code></pre></div>
125
+<pre><code>## [1] No  No  No  No  Yes No 
126
+## Levels: No Yes</code></pre>
127
+<p>The numeric matrix variable <em>measurements</em> stores the
128
+normalised values of the RNA gene abundances for each sample and the
129
+factor vector <em>classes</em> identifies which class the samples belong
130
+to. The measurements were normalised using <strong>DESeq2</strong>’s
131
+<em>varianceStabilizingTransformation</em> function, which produces
132
+<span class="math inline">\(log_2\)</span>-like data.</p>
133
+<p>For more complex data sets with multiple kinds of experiments
134
+(e.g. DNA methylation, copy number, gene expression on the same set of
135
+samples) a <a
136
+href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html"><em>MultiAssayExperiment</em></a>
137
+is recommended for data storage and supported by
138
+<strong>ClassifyR</strong>’s methods.</p>
139
+</div>
140
+</div>
141
+<div id="quick-start-crossvalidate-function" class="section level2">
142
+<h2>Quick Start: <em>crossValidate</em> Function</h2>
143
+<p>The <em>crossValidate</em> function offers a quick and simple way to
144
+start analysing a dataset in ClassifyR. It is a wrapper for
145
+<em>runTests</em>, the core model building and testing function of
146
+ClassifyR. <em>crossValidate</em> must be supplied with
147
+<em>measurements</em>, a simple tabular data container or a list-like
148
+structure of such related tabular data on common samples. The classes of
149
+it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>,
150
+<em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>.
151
+For a dataset with <span class="math inline">\(n\)</span> observations
152
+and <span class="math inline">\(p\)</span> variables, the
153
+<em>crossValidate</em> function will accept inputs of the following
154
+shapes:</p>
155
+<table>
156
+<colgroup>
157
+<col width="25%" />
158
+<col width="37%" />
159
+<col width="37%" />
160
+</colgroup>
161
+<thead>
162
+<tr class="header">
163
+<th>Data Type</th>
164
+<th align="center"><span class="math inline">\(n \times p\)</span></th>
165
+<th align="center"><span class="math inline">\(p \times n\)</span></th>
166
+</tr>
167
+</thead>
168
+<tbody>
169
+<tr class="odd">
170
+<td><span
171
+style="font-family: &#39;Courier New&#39;, monospace;">matrix</span></td>
172
+<td align="center">✔</td>
173
+<td align="center"></td>
174
+</tr>
175
+<tr class="even">
176
+<td><span
177
+style="font-family: &#39;Courier New&#39;, monospace;">data.frame</span></td>
178
+<td align="center">✔</td>
179
+<td align="center"></td>
180
+</tr>
181
+<tr class="odd">
182
+<td><span
183
+style="font-family: &#39;Courier New&#39;, monospace;">DataFrame</span></td>
184
+<td align="center">✔</td>
185
+<td align="center"></td>
186
+</tr>
187
+<tr class="even">
188
+<td><span
189
+style="font-family: &#39;Courier New&#39;, monospace;">MultiAssayExperiment</span></td>
190
+<td align="center"></td>
191
+<td align="center">✔</td>
192
+</tr>
193
+<tr class="odd">
194
+<td><span
195
+style="font-family: &#39;Courier New&#39;, monospace;">list</span> of
196
+<span
197
+style="font-family: &#39;Courier New&#39;, monospace;">data.frame</span>s</td>
198
+<td align="center">✔</td>
199
+<td align="center"></td>
200
+</tr>
201
+</tbody>
202
+</table>
203
+<p><em>crossValidate</em> must also be supplied with <em>outcome</em>,
204
+which represents the prediction to be made in a variety of possible
205
+ways.</p>
206
+<ul>
207
+<li>A <em>factor</em> that contains the class label for each
208
+observation. <em>classes</em> must be of length <span
209
+class="math inline">\(n\)</span>.</li>
210
+<li>A <em>character</em> of length 1 that matches a column name in a
211
+data frame which holds the classes. The classes will automatically be
212
+removed before training is done.</li>
213
+<li>A <em>Surv</em> object of the same length as the number of samples
214
+in the data which contains information about the time and censoring of
215
+the samples.</li>
216
+<li>A <em>character</em> vector of length 2 or 3 that each match a
217
+column name in a data frame which holds information about the time and
218
+censoring of the samples. The time-to-event columns will automatically
219
+be removed before training is done.</li>
220
+</ul>
221
+<p>The type of classifier used can be changed with the
222
+<em>classifier</em> argument. The default is a random forest, which
223
+seamlessly handles categorical and numerical data. A full list of
224
+classifiers can be seen by running <em>?crossValidate</em>. A feature
225
+selection step can be performed before classification using
226
+<em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by
227
+default. Similarly, the number of folds and number of repeats for cross
228
+validation can be changed with the <em>nFolds</em> and <em>nRepeats</em>
229
+arguments. If wanted, <em>nCores</em> can be specified to run the cross
230
+validation in parallel. To perform 5-fold cross-validation of a Support
231
+Vector Machine with 2 repeats:</p>
232
+<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>result <span class="ot">&lt;-</span> <span class="fu">crossValidate</span>(measurements, classes, <span class="at">classifier =</span> <span class="st">&quot;SVM&quot;</span>,</span>
233
+<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>                        <span class="at">nFeatures =</span> <span class="dv">20</span>, <span class="at">nFolds =</span> <span class="dv">5</span>, <span class="at">nRepeats =</span> <span class="dv">2</span>, <span class="at">nCores =</span> <span class="dv">1</span>)</span></code></pre></div>
234
+<pre><code>## Processing sample set 10.</code></pre>
235
+<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">performancePlot</span>(result)</span></code></pre></div>
236
+<pre><code>## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</code></pre>
237
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-5-1.png" width="700" /></p>
238
+<div id="data-integration-with-crossvalidate" class="section level3">
239
+<h3>Data Integration with crossValidate</h3>
240
+<p><em>crossValidate</em> also allows data from multiple sources to be
241
+integrated into a single model. The integration method can be specified
242
+with <em>multiViewMethod</em> argument. In this example, suppose the
243
+first 10 variables in the asthma data set are from a certain source and
244
+the remaining 1990 variables are from a second source. To integrate
245
+multiple data sets, each variable must be labeled with the data set it
246
+came from. This is done in a different manner depending on the data type
247
+of <em>measurements</em>.</p>
248
+<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified
249
+using <em>mcols</em>. In the column metadata, each feature must have an
250
+<em>assay</em> and a <em>feature</em> name.</p>
251
+<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>measurementsDF <span class="ot">&lt;-</span> <span class="fu">DataFrame</span>(measurements)</span>
252
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">mcols</span>(measurementsDF) <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(</span>
253
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">assay =</span> <span class="fu">rep</span>(<span class="fu">c</span>(<span class="st">&quot;assay_1&quot;</span>, <span class="st">&quot;assay_2&quot;</span>), <span class="at">times =</span> <span class="fu">c</span>(<span class="dv">10</span>, <span class="dv">1990</span>)),</span>
254
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a>  <span class="at">feature =</span> <span class="fu">colnames</span>(measurementsDF)</span>
255
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a>)</span>
256
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a></span>
257
+<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a>result <span class="ot">&lt;-</span> <span class="fu">crossValidate</span>(measurementsDF, classes, <span class="at">classifier =</span> <span class="st">&quot;SVM&quot;</span>, <span class="at">nFolds =</span> <span class="dv">5</span>,</span>
258
+<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a>                        <span class="at">nRepeats =</span> <span class="dv">3</span>, <span class="at">multiViewMethod =</span> <span class="st">&quot;merge&quot;</span>)</span></code></pre></div>
259
+<pre><code>## Processing sample set 10.
260
+## Processing sample set 10.
261
+## Processing sample set 10.</code></pre>
262
+<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">performancePlot</span>(result, <span class="at">characteristicsList =</span> <span class="fu">list</span>(<span class="at">x =</span> <span class="st">&quot;Assay Name&quot;</span>))</span></code></pre></div>
263
+<pre><code>## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</code></pre>
264
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-6-1.png" width="700" /></p>
265
+<p>If using a list of <em>data.frame</em>s, the name of each element in
266
+the list will be used as the assay name.</p>
267
+<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Assigns first 10 variables to dataset_1, and the rest to dataset_2</span></span>
268
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>measurementsList <span class="ot">&lt;-</span> <span class="fu">list</span>(</span>
269
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>  (measurements <span class="sc">|&gt;</span> <span class="fu">as.data.frame</span>())[<span class="dv">1</span><span class="sc">:</span><span class="dv">10</span>],</span>
270
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>  (measurements <span class="sc">|&gt;</span> <span class="fu">as.data.frame</span>())[<span class="dv">11</span><span class="sc">:</span><span class="dv">2000</span>]</span>
271
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>)</span>
272
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a><span class="fu">names</span>(measurementsList) <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">&quot;assay_1&quot;</span>, <span class="st">&quot;assay_2&quot;</span>)</span>
273
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a></span>
274
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>result <span class="ot">&lt;-</span> <span class="fu">crossValidate</span>(measurementsList, classes, <span class="at">classifier =</span> <span class="st">&quot;SVM&quot;</span>, <span class="at">nFolds =</span> <span class="dv">5</span>,</span>
275
+<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a>                        <span class="at">nRepeats =</span> <span class="dv">3</span>, <span class="at">multiViewMethod =</span> <span class="st">&quot;merge&quot;</span>)</span></code></pre></div>
276
+<pre><code>## Processing sample set 10.
277
+## Processing sample set 10.
278
+## Processing sample set 10.</code></pre>
279
+<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">performancePlot</span>(result, <span class="at">characteristicsList =</span> <span class="fu">list</span>(<span class="at">x =</span> <span class="st">&quot;Assay Name&quot;</span>))</span></code></pre></div>
280
+<pre><code>## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</code></pre>
281
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-7-1.png" width="700" /></p>
282
+</div>
283
+</div>
284
+<div id="a-more-detailed-look-at-classifyr" class="section level2">
285
+<h2>A More Detailed Look at ClassifyR</h2>
286
+<p>In the following sections, some of the most useful functions provided
287
+in <strong>ClassifyR</strong> will be demonstrated. However, a user
288
+could wrap any feature selection, training, or prediction function to
289
+the classification framework, as long as it meets some simple rules
290
+about the input and return parameters. See the appendix section of this
291
+guide titled “Rules for New Functions” for a description of these.</p>
292
+<div id="comparison-to-existing-classification-frameworks"
293
+class="section level3">
294
+<h3>Comparison to Existing Classification Frameworks</h3>
295
+<p>There are a few other frameworks for classification in R. The table
296
+below provides a comparison of which features they offer.</p>
297
+<table>
298
+<colgroup>
299
+<col width="8%" />
300
+<col width="10%" />
301
+<col width="8%" />
302
+<col width="10%" />
303
+<col width="10%" />
304
+<col width="11%" />
305
+<col width="14%" />
306
+<col width="12%" />
307
+<col width="12%" />
308
+</colgroup>
309
+<thead>
310
+<tr class="header">
311
+<th>Package</th>
312
+<th>Run User-defined Classifiers</th>
313
+<th>Parallel Execution on any OS</th>
314
+<th>Parameter Tuning</th>
315
+<th>Intel DAAL Performance Metrics</th>
316
+<th>Ranking and Selection Plots</th>
317
+<th>Class Distribution Plot</th>
318
+<th>Sample-wise Error Heatmap</th>
319
+<th>Direct Support for MultiAssayExperiment Input</th>
320
+</tr>
321
+</thead>
322
+<tbody>
323
+<tr class="odd">
324
+<td><strong>ClassifyR</strong></td>
325
+<td>Yes</td>
326
+<td>Yes</td>
327
+<td>Yes</td>
328
+<td>Yes</td>
329
+<td>Yes</td>
330
+<td>Yes</td>
331
+<td>Yes</td>
332
+<td>Yes</td>
333
+</tr>
334
+<tr class="even">
335
+<td>caret</td>
336
+<td>Yes</td>
337
+<td>Yes</td>
338
+<td>Yes</td>
339
+<td>No</td>
340
+<td>No</td>
341
+<td>No</td>
342
+<td>No</td>
343
+<td>No</td>
344
+</tr>
345
+<tr class="odd">
346
+<td>MLInterfaces</td>
347
+<td>Yes</td>
348
+<td>No</td>
349
+<td>No</td>
350
+<td>No</td>
351
+<td>No</td>
352
+<td>No</td>
353
+<td>No</td>
354
+<td>No</td>
355
+</tr>
356
+<tr class="even">
357
+<td>MCRestimate</td>
358
+<td>Yes</td>
359
+<td>No</td>
360
+<td>Yes</td>
361
+<td>No</td>
362
+<td>No</td>
363
+<td>No</td>
364
+<td>No</td>
365
+<td>No</td>
366
+</tr>
367
+<tr class="odd">
368
+<td>CMA</td>
369
+<td>No</td>
370
+<td>No</td>
371
+<td>Yes</td>
372
+<td>No</td>
373
+<td>No</td>
374
+<td>No</td>
375
+<td>No</td>
376
+<td>No</td>
377
+</tr>
378
+</tbody>
379
+</table>
380
+</div>
381
+<div id="provided-functionality" class="section level3">
382
+<h3>Provided Functionality</h3>
383
+<p>Although being a cross-validation framework, a number of popular
384
+feature selection and classification functions are provided by the
385
+package which meet the requirements of functions to be used by it (see
386
+the last section).</p>
387
+<div id="provided-methods-for-feature-selection-and-classification"
388
+class="section level4">
389
+<h4>Provided Methods for Feature Selection and Classification</h4>
390
+<p>In the following tables, a function that is used when no function is
391
+explicitly specified by the user is shown as <span
392
+style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
393
+<p>The functions below produce a ranking, of which different size
394
+subsets are tried and the classifier performance evaluated, to select a
395
+best subset of features, based on a criterion such as balanced accuracy
396
+rate, for example.</p>
397
+<table style="width:100%;">
398
+<colgroup>
399
+<col width="9%" />
400
+<col width="62%" />
401
+<col width="9%" />
402
+<col width="9%" />
403
+<col width="9%" />
404
+</colgroup>
405
+<thead>
406
+<tr class="header">
407
+<th>Function</th>
408
+<th>Description</th>
409
+<th>DM</th>
410
+<th>DV</th>
411
+<th>DD</th>
412
+</tr>
413
+</thead>
414
+<tbody>
415
+<tr class="odd">
416
+<td><span
417
+style="padding:4px; border:2px dashed #e64626; font-family: &#39;Courier New&#39;, monospace;">differentMeansRanking</span></td>
418
+<td>t-test ranking if two classes, F-test ranking if three or more</td>
419
+<td>✔</td>
420
+<td></td>
421
+<td></td>
422
+</tr>
423
+<tr class="even">
424
+<td><span
425
+style="font-family: &#39;Courier New&#39;, monospace;">limmaRanking</span></td>
426
+<td>Moderated t-test ranking using variance shrinkage</td>
427
+<td>✔</td>
428
+<td></td>
429
+<td></td>
430
+</tr>
431
+<tr class="odd">
432
+<td><span
433
+style="font-family: &#39;Courier New&#39;, monospace;">edgeRranking</span></td>
434
+<td>Likelihood ratio test for count data ranking</td>
435
+<td>✔</td>
436
+<td></td>
437
+<td></td>
438
+</tr>
439
+<tr class="even">
440
+<td><span
441
+style="font-family: &#39;Courier New&#39;, monospace;">bartlettRanking</span></td>
442
+<td>Bartlett’s test non-robust ranking</td>
443
+<td></td>
444
+<td>✔</td>
445
+<td></td>
446
+</tr>
447
+<tr class="odd">
448
+<td><span
449
+style="font-family: &#39;Courier New&#39;, monospace;">leveneRanking</span></td>
450
+<td>Levene’s test robust ranking</td>
451
+<td></td>
452
+<td>✔</td>
453
+<td></td>
454
+</tr>
455
+<tr class="even">
456
+<td><span
457
+style="font-family: &#39;Courier New&#39;, monospace;">DMDranking</span></td>
458
+<td><span style="white-space: nowrap">Difference in location
459
+(mean/median) and/or scale (SD, MAD, <span
460
+class="math inline">\(Q_n\)</span>)</span></td>
461
+<td>✔</td>
462
+<td>✔</td>
463
+<td>✔</td>
464
+</tr>
465
+<tr class="odd">
466
+<td><span
467
+style="font-family: &#39;Courier New&#39;, monospace;">likelihoodRatioRanking</span></td>
468
+<td>Likelihood ratio (normal distribution) ranking</td>
469
+<td>✔</td>
470
+<td>✔</td>
471
+<td>✔</td>
472
+</tr>
473
+<tr class="even">
474
+<td><span
475
+style="font-family: &#39;Courier New&#39;, monospace;">KolmogorovSmirnovRanking</span></td>
476
+<td>Kolmogorov-Smirnov distance between distributions ranking</td>
477
+<td>✔</td>
478
+<td>✔</td>
479
+<td>✔</td>
480
+</tr>
481
+<tr class="odd">
482
+<td><span
483
+style="font-family: &#39;Courier New&#39;, monospace;">KullbackLeiblerRanking</span></td>
484
+<td>Kullback-Leibler distance between distributions ranking</td>
485
+<td>✔</td>
486
+<td>✔</td>
487
+<td>✔</td>
488
+</tr>
489
+</tbody>
490
+</table>
491
+<p>Likewise, a variety of classifiers is also provided.</p>
492
+<table>
493
+<colgroup>
494
+<col width="9%" />
495
+<col width="61%" />
496
+<col width="9%" />
497
+<col width="9%" />
498
+<col width="9%" />
499
+</colgroup>
500
+<thead>
501
+<tr class="header">
502
+<th>Function(s)</th>
503
+<th>Description</th>
504
+<th>DM</th>
505
+<th>DV</th>
506
+<th>DD</th>
507
+</tr>
508
+</thead>
509
+<tbody>
510
+<tr class="odd">
511
+<td><span
512
+style="padding:1px; border:2px dashed #e64626; display:inline-block; margin-bottom: 3px; font-family: &#39;Courier New&#39;, monospace;">DLDAtrainInterface</span>,<br><span
513
+style="padding:1px; border:2px dashed #e64626; display:inline-block; font-family: &#39;Courier New&#39;, monospace;">DLDApredictInterface</span></td>
514
+<td>Wrappers for sparsediscrim’s functions <span
515
+style="font-family: &#39;Courier New&#39;, monospace;">dlda</span> and
516
+<span
517
+style="font-family: &#39;Courier New&#39;, monospace;">predict.dlda</span>
518
+functions</td>
519
+<td>✔</td>
520
+<td></td>
521
+<td></td>
522
+</tr>
523
+<tr class="even">
524
+<td><span
525
+style="font-family: &#39;Courier New&#39;, monospace;">classifyInterface</span></td>
526
+<td>Wrapper for PoiClaClu’s Poisson LDA function <span
527
+style="font-family: &#39;Courier New&#39;, monospace;">classify</span></td>
528
+<td>✔</td>
529
+<td></td>
530
+<td></td>
531
+</tr>
532
+<tr class="odd">
533
+<td><span
534
+style="font-family: &#39;Courier New&#39;, monospace;">elasticNetGLMtrainInterface</span>,
535
+<span
536
+style="font-family: &#39;Courier New&#39;, monospace;">elasticNetGLMpredictInterface</span></td>
537
+<td>Wrappers for glmnet’s elastic net GLM functions <span
538
+style="font-family: &#39;Courier New&#39;, monospace;">glmnet</span> and
539
+<span
540
+style="font-family: &#39;Courier New&#39;, monospace;">predict.glmnet</span></td>
541
+<td>✔</td>
542
+<td></td>
543
+<td></td>
544
+</tr>
545
+<tr class="even">
546
+<td><span
547
+style="font-family: &#39;Courier New&#39;, monospace;">NSCtrainInterface</span>,
548
+<span
549
+style="font-family: &#39;Courier New&#39;, monospace;">NSCpredictInterface</span></td>
550
+<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span
551
+style="font-family: &#39;Courier New&#39;, monospace;">pamr.train</span>
552
+and <span
553
+style="font-family: &#39;Courier New&#39;, monospace;">pamr.predict</span></td>
554
+<td>✔</td>
555
+<td></td>
556
+<td></td>
557
+</tr>
558
+<tr class="odd">
559
+<td><span
560
+style="font-family: &#39;Courier New&#39;, monospace;">fisherDiscriminant</span></td>
561
+<td>Implementation of Fisher’s LDA for departures from normality</td>
562
+<td>✔</td>
563
+<td>✔*</td>
564
+<td></td>
565
+</tr>
566
+<tr class="even">
567
+<td><span
568
+style="font-family: &#39;Courier New&#39;, monospace;">mixModelsTrain</span>,
569
+<span
570
+style="font-family: &#39;Courier New&#39;, monospace;">mixModelsPredict</span></td>
571
+<td>Feature-wise mixtures of normals and voting</td>
572
+<td>✔</td>
573
+<td>✔</td>
574
+<td>✔</td>
575
+</tr>
576
+<tr class="odd">
577
+<td><span
578
+style="font-family: &#39;Courier New&#39;, monospace;">naiveBayesKernel</span></td>
579
+<td>Feature-wise kernel density estimation and voting</td>
580
+<td>✔</td>
581
+<td>✔</td>
582
+<td>✔</td>
583
+</tr>
584
+<tr class="even">
585
+<td><span
586
+style="font-family: &#39;Courier New&#39;, monospace;">randomForestTrainInterface</span>,
587
+<span
588
+style="font-family: &#39;Courier New&#39;, monospace;">randomForestPredictInterface</span></td>
589
+<td>Wrapper for ranger’s functions <span
590
+style="font-family: &#39;Courier New&#39;, monospace;">ranger</span> and
591
+<span
592
+style="font-family: &#39;Courier New&#39;, monospace;">predict</span></td>
593
+<td>✔</td>
594
+<td>✔</td>
595
+<td>✔</td>
596
+</tr>
597
+<tr class="odd">
598
+<td><span
599
+style="font-family: &#39;Courier New&#39;, monospace;">extremeGradientBoostingTrainInterface</span>,
600
+<span
601
+style="font-family: &#39;Courier New&#39;, monospace;">extremeGradientBoostingPredictInterface</span></td>
602
+<td>Wrapper for xgboost’s functions <span
603
+style="font-family: &#39;Courier New&#39;, monospace;">xgboost</span>
604
+and <span
605
+style="font-family: &#39;Courier New&#39;, monospace;">predict</span></td>
606
+<td>✔</td>
607
+<td>✔</td>
608
+<td>✔</td>
609
+</tr>
610
+<tr class="even">
611
+<td><span
612
+style="font-family: &#39;Courier New&#39;, monospace;">kNNinterface</span></td>
613
+<td>Wrapper for class’s function <span
614
+style="font-family: &#39;Courier New&#39;, monospace;">knn</span></td>
615
+<td>✔</td>
616
+<td>✔</td>
617
+<td>✔</td>
618
+</tr>
619
+<tr class="odd">
620
+<td><span
621
+style="font-family: &#39;Courier New&#39;, monospace;">SVMtrainInterface</span>,
622
+<span
623
+style="font-family: &#39;Courier New&#39;, monospace;">SVMpredictInterface</span></td>
624
+<td>Wrapper for e1071’s functions <span
625
+style="font-family: &#39;Courier New&#39;, monospace;">svm</span> and
626
+<span
627
+style="font-family: &#39;Courier New&#39;, monospace;">predict.svm</span></td>
628
+<td>✔</td>
629
+<td>✔ †</td>
630
+<td>✔ †</td>
631
+</tr>
632
+</tbody>
633
+</table>
634
+<p>* If ordinary numeric measurements have been transformed to absolute
635
+deviations using <span
636
+style="font-family: &#39;Courier New&#39;, monospace;">subtractFromLocation</span>.<br>
637
+† If the value of <span
638
+style="font-family: &#39;Courier New&#39;, monospace;">kernel</span> is
639
+not <span
640
+style="font-family: &#39;Courier New&#39;, monospace;">“linear”</span>.</p>
641
+<p>If a desired selection or classification method is not already
642
+implemented, rules for writing functions to work with
643
+<strong>ClassifyR</strong> are outlined in the wrapper vignette. Please
644
+visit it for more information.</p>
645
+</div>
646
+<div id="provided-meta-feature-methods" class="section level4">
647
+<h4>Provided Meta-feature Methods</h4>
648
+<p>A number of methods are provided for users to enable classification
649
+in a feature-set-centric or interactor-centric way. The meta-feature
650
+creation functions should be used before cross-validation is done.</p>
651
+<table>
652
+<colgroup>
653
+<col width="9%" />
654
+<col width="61%" />
655
+<col width="14%" />
656
+<col width="14%" />
657
+</colgroup>
658
+<thead>
659
+<tr class="header">
660
+<th>Function</th>
661
+<th>Description</th>
662
+<th align="center">Before CV</th>
663
+<th align="center">During CV</th>
664
+</tr>
665
+</thead>
666
+<tbody>
667
+<tr class="odd">
668
+<td><span
669
+style="font-family: &#39;Courier New&#39;, monospace;">edgesToHubNetworks</span></td>
670
+<td>Takes a two-column <span
671
+style="font-family: &#39;Courier New&#39;, monospace;">matrix</span> or
672
+<span
673
+style="font-family: &#39;Courier New&#39;, monospace;">DataFrame</span>
674
+and finds all nodes with at least a minimum number of interactions</td>
675
+<td align="center">✔</td>
676
+<td align="center"></td>
677
+</tr>
678
+<tr class="even">
679
+<td><span
680
+style="font-family: &#39;Courier New&#39;, monospace;">featureSetSummary</span></td>
681
+<td><span style="white-space: nowrap">Considers sets of features and
682
+calculates their mean or median</span></td>
683
+<td align="center">✔</td>
684
+<td align="center"></td>
685
+</tr>
686
+<tr class="odd">
687
+<td><span
688
+style="font-family: &#39;Courier New&#39;, monospace;">pairsDifferencesSelection</span></td>
689
+<td>Finds a set of pairs of features whose measurement inequalities can
690
+be used for predicting with</td>
691
+<td align="center"></td>
692
+<td align="center">✔</td>
693
+</tr>
694
+<tr class="even">
695
+<td><span
696
+style="font-family: &#39;Courier New&#39;, monospace;">kTSPclassifier</span></td>
697
+<td>Voting classifier that uses inequalities between pairs of features
698
+to vote for one of two classes</td>
699
+<td align="center"></td>
700
+<td align="center">✔</td>
701
+</tr>
702
+</tbody>
703
+</table>
704
+</div>
705
+</div>
706
+<div id="fine-grained-cross-validation-and-modelling-using-runtests"
707
+class="section level3">
708
+<h3>Fine-grained Cross-validation and Modelling Using
709
+<em>runTests</em></h3>
710
+<p>For more control over the finer aspects of cross-validation of a
711
+single data set, <em>runTests</em> may be employed in place of
712
+<em>crossValidate</em>. For the variety of cross-validation, the
713
+parameters are specified by a <em>CrossValParams</em> object. The
714
+default setting is for 100 permutations and five folds and parameter
715
+tuning is done by resubstitution. It is also recommended to specify a
716
+<em>parallelParams</em> setting. On Linux and MacOS operating systems,
717
+it should be <em>MulticoreParam</em> and on Windows computers it should
718
+be <em>SnowParam</em>. Note that each of these have an option
719
+<em>RNGseed</em> and this <strong>needs to be set by the user</strong>
720
+because some classifiers or feature selection functions will have some
721
+element of randomisation. One example that works on all operating
722
+systems, but is best-suited to Windows is:</p>
723
+<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>CVparams <span class="ot">&lt;-</span> <span class="fu">CrossValParams</span>(<span class="at">parallelParams =</span> <span class="fu">SnowParam</span>(<span class="dv">16</span>, <span class="at">RNGseed =</span> <span class="dv">123</span>))</span>
724
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>CVparams</span></code></pre></div>
725
+<p>For the actual operations to do to the data to build a model of it,
726
+each of the stages should be specified by an object of class
727
+<em>ModellingParams</em>. This controls how class imbalance is handled
728
+(default is to downsample to the smallest class), any transformation
729
+that needs to be done inside of cross-validation (i.e. involving a
730
+computed value from the training set), any feature selection and the
731
+training and prediction functions to be used. The default is to do an
732
+ordinary t-test (two groups) or ANOVA (three or more groups) and
733
+classification using diagonal LDA.</p>
734
+<div class="sourceCode" id="cb21"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ModellingParams</span>()</span></code></pre></div>
735
+<pre><code>## An object of class &quot;ModellingParams&quot;
736
+## Slot &quot;balancing&quot;:
737
+## [1] &quot;downsample&quot;
738
+## 
739
+## Slot &quot;transformParams&quot;:
740
+## NULL
741
+## 
742
+## Slot &quot;selectParams&quot;:
743
+## An object of class &#39;SelectParams&#39;.
744
+## Selection Name: Difference in Means.
745
+## 
746
+## Slot &quot;trainParams&quot;:
747
+## An object of class &#39;TrainParams&#39;.
748
+## Classifier Name: Diagonal LDA.
749
+## 
750
+## Slot &quot;predictParams&quot;:
751
+## An object of class &#39;PredictParams&#39;.
752
+## 
753
+## Slot &quot;doImportance&quot;:
754
+## [1] FALSE</code></pre>
755
+</div>
756
+<div id="runtests-driver-function-of-cross-validated-classification"
757
+class="section level3">
758
+<h3>runTests Driver Function of Cross-validated Classification</h3>
759
+<p><em>runTests</em> is the main function in <strong>ClassifyR</strong>
760
+which handles the sample splitting and parallelisation, if used, of
761
+cross-validation. To begin with, a simple classifier will be
762
+demonstrated. It uses a t-test or ANOVA ranking (depending on the number
763
+of classes) for feature ranking and DLDA for classification. This
764
+classifier relies on differences in means between classes. No parameters
765
+need to be specified, because this is the default classification of
766
+<em>runTests</em>. By default, the number of features is tuned by
767
+resubstitution on the training set.</p>
768
+<div class="sourceCode" id="cb23"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>crossValParams <span class="ot">&lt;-</span> <span class="fu">CrossValParams</span>(<span class="at">permutations =</span> <span class="dv">5</span>)</span>
769
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>DMresults <span class="ot">&lt;-</span> <span class="fu">runTests</span>(measurements, classes, crossValParams, <span class="at">verbose =</span> <span class="dv">1</span>)</span></code></pre></div>
770
+<pre><code>## Processing sample set 10.</code></pre>
771
+<pre><code>## Processing sample set 20.</code></pre>
772
+<p>Here, 5 permutations (non-default) and 5 folds cross-validation
773
+(default) is specified. For computers with more than 1 CPU, the number
774
+of cores to use can be given to <em>runTests</em> by using the argument
775
+<em>parallelParams</em>. The parameter <em>seed</em> is important to set
776
+for result reproducibility when doing a cross-validation such as this,
777
+because it employs randomisation to partition the samples into folds.
778
+Also, <em>RNGseed</em> is highly recommended to be set to the back-end
779
+specified to <em>BPPARAM</em> if doing parallel processing. The first
780
+seed mentioned does not work for parallel processes. For more details
781
+about <em>runTests</em> and the parameter classes used by it, consult
782
+the help pages of such functions.</p>
783
+</div>
784
+</div>
785
+<div id="evaluation-of-a-classification" class="section level2">
786
+<h2>Evaluation of a Classification</h2>
787
+<p>The most frequently selected gene can be identified using the
788
+<em>distribution</em> function and its relative abundance values for all
789
+samples can be displayed visually by <em>plotFeatureClasses</em>.</p>
790
+<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>selectionPercentages <span class="ot">&lt;-</span> <span class="fu">distribution</span>(DMresults, <span class="at">plot =</span> <span class="cn">FALSE</span>)</span>
791
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(selectionPercentages)</span>
792
+<span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>sortedPercentages <span class="ot">&lt;-</span> <span class="fu">head</span>(selectionPercentages[<span class="fu">order</span>(selectionPercentages, <span class="at">decreasing =</span> <span class="cn">TRUE</span>)])</span>
793
+<span id="cb26-4"><a href="#cb26-4" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sortedPercentages)</span>
794
+<span id="cb26-5"><a href="#cb26-5" aria-hidden="true" tabindex="-1"></a>mostChosen <span class="ot">&lt;-</span> sortedPercentages[<span class="dv">1</span>]</span>
795
+<span id="cb26-6"><a href="#cb26-6" aria-hidden="true" tabindex="-1"></a>bestGenePlot <span class="ot">&lt;-</span> <span class="fu">plotFeatureClasses</span>(measurements, classes, <span class="fu">names</span>(mostChosen), <span class="at">dotBinWidth =</span> <span class="fl">0.1</span>,</span>
796
+<span id="cb26-7"><a href="#cb26-7" aria-hidden="true" tabindex="-1"></a>                                   <span class="at">xAxisLabel =</span> <span class="st">&quot;Normalised Expression&quot;</span>)</span></code></pre></div>
797
+<pre><code>## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
798
+## ℹ Please use `after_stat(density)` instead.
799
+## ℹ The deprecated feature was likely used in the ClassifyR package.
800
+##   Please report the issue to the authors.</code></pre>
801
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-11-1.png" width="768" /></p>
802
+<pre><code>## allFeaturesText
803
+##   ANKMY1 ARHGAP39 C10orf95 C19orf51  C2orf55 C6orf108 
804
+##        8       64      100       80        4       12 
805
+## allFeaturesText
806
+## C10orf95    CROCC    SSBP4   ZDHHC1  TMEM190 C19orf51 
807
+##      100      100      100      100       84       80</code></pre>
808
+<p>The means of the abundance levels of C10orf95 are substantially
809
+different between the people with and without asthma.
810
+<em>plotFeatureClasses</em> can also plot categorical data, such as may
811
+be found in a clinical data table, as a bar chart.</p>
812
+<p>Classification error rates, as well as many other prediction
813
+performance measures, can be calculated with <em>calcCVperformance</em>.
814
+Next, the balanced accuracy rate is calculated considering all samples,
815
+each of which was in the test set once. The balanced accuracy rate is
816
+defined as the average rate of the correct classifications of each
817
+class.</p>
818
+<p>See the documentation of <em>calcCVperformance</em> for a list of
819
+performance metrics which may be calculated.</p>
820
+<div class="sourceCode" id="cb29"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>DMresults <span class="ot">&lt;-</span> <span class="fu">calcCVperformance</span>(DMresults)</span>
821
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>DMresults</span></code></pre></div>
822
+<pre><code>## An object of class &#39;ClassifyResult&#39;.
823
+## Characteristics:
824
+##    characteristic                   value
825
+##    Selection Name     Difference in Means
826
+##   Classifier Name            Diagonal LDA
827
+##  Cross-validation 5 Permutations, 5 Folds
828
+## Features: List of length 25 of feature identifiers.
829
+## Predictions: A data frame of 950 rows.
830
+## Performance Measures: Balanced Accuracy.</code></pre>
831
+<div class="sourceCode" id="cb31"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">performance</span>(DMresults)</span></code></pre></div>
832
+<pre><code>## $`Balanced Accuracy`
833
+##         1         2         3         4         5 
834
+## 0.7850684 0.7931329 0.8011975 0.8047410 0.8077957</code></pre>
835
+<p>The error rate is about 20%. If only a vector of predictions and a
836
+vector of actual classes is available, such as from an old study which
837
+did not use <strong>ClassifyR</strong> for cross-validation, then
838
+<em>calcExternalPerformance</em> can be used on a pair of factor vectors
839
+which have the same length.</p>
840
+<div id="comparison-of-different-classifications"
841
+class="section level3">
842
+<h3>Comparison of Different Classifications</h3>
843
+<p>The <em>samplesMetricMap</em> function allows the visual comparison
844
+of sample-wise error rate or accuracy measures from different
845
+<em>ClassifyResult</em> objects. Firstly, a classifier will be run that
846
+uses Kullback-Leibler divergence ranking and resubstitution error as a
847
+feature selection heuristic and a naive Bayes classifier for
848
+classification. This classification will use features that have either a
849
+change in location or in scale between classes.</p>
850
+<div class="sourceCode" id="cb33"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>modellingParamsDD <span class="ot">&lt;-</span> <span class="fu">ModellingParams</span>(<span class="at">selectParams =</span> <span class="fu">SelectParams</span>(<span class="st">&quot;KL&quot;</span>),</span>
851
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>                                     <span class="at">trainParams =</span> <span class="fu">TrainParams</span>(<span class="st">&quot;naiveBayes&quot;</span>),</span>
852
+<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a>                                     <span class="at">predictParams =</span> <span class="cn">NULL</span>)</span>
853
+<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a>DDresults <span class="ot">&lt;-</span> <span class="fu">runTests</span>(measurements, classes, crossValParams, modellingParamsDD, <span class="at">verbose =</span> <span class="dv">1</span>)</span></code></pre></div>
854
+<pre><code>## Processing sample set 10.</code></pre>
855
+<pre><code>## Processing sample set 20.</code></pre>
856
+<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>DDresults</span></code></pre></div>
857
+<pre><code>## An object of class &#39;ClassifyResult&#39;.
858
+## Characteristics:
859
+##    characteristic                       value
860
+##    Selection Name Kullback-Leibler Divergence
861
+##   Classifier Name          Naive Bayes Kernel
862
+##  Cross-validation     5 Permutations, 5 Folds
863
+## Features: List of length 25 of feature identifiers.
864
+## Predictions: A data frame of 950 rows.
865
+## Performance Measures: None calculated yet.</code></pre>
866
+<p>The naive Bayes kernel classifier by default uses the vertical
867
+distance between class densities but it can instead use the horizontal
868
+distance to the nearest non-zero density cross-over point to confidently
869
+classify samples in the tails of the densities.</p>
870
+<p>Now, the classification error for each sample is also calculated for
871
+both the differential means and differential distribution classifiers
872
+and both <em>ClassifyResult</em> objects generated so far are plotted
873
+with <em>samplesMetricMap</em>.</p>
874
+<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>DMresults <span class="ot">&lt;-</span> <span class="fu">calcCVperformance</span>(DMresults, <span class="st">&quot;Sample Error&quot;</span>)</span>
875
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>DDresults <span class="ot">&lt;-</span> <span class="fu">calcCVperformance</span>(DDresults, <span class="st">&quot;Sample Error&quot;</span>)</span>
876
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>resultsList <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">Abundance =</span> DMresults, <span class="at">Distribution =</span> DDresults)</span>
877
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a><span class="fu">samplesMetricMap</span>(resultsList, <span class="at">metric =</span> <span class="st">&quot;Sample Error&quot;</span>, <span class="at">xAxisLabel =</span> <span class="st">&quot;Sample&quot;</span>,</span>
878
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>                              <span class="at">showXtickLabels =</span> <span class="cn">FALSE</span>)</span></code></pre></div>
879
+<pre><code>## Warning: Removed 2 rows containing missing values (`geom_tile()`).</code></pre>
880
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-14-1.png" width="960" /></p>
881
+<pre><code>## TableGrob (2 x 1) &quot;arrange&quot;: 2 grobs
882
+##   z     cells    name                grob
883
+## 1 1 (2-2,1-1) arrange      gtable[layout]
884
+## 2 2 (1-1,1-1) arrange text[GRID.text.533]</code></pre>
885
+<p>The benefit of this plot is that it allows the easy identification of
886
+samples which are hard to classify and could be explained by considering
887
+additional information about them. Differential distribution class
888
+prediction appears to be biased to the majority class (No Asthma).</p>
889
+<p>More traditionally, the distribution of performance values of each
890
+complete cross-validation can be visualised by <em>performancePlot</em>
891
+by providing them as a list to the function. The default is to draw box
892
+plots, but violin plots could also be made. The default performance
893
+metric to plot is balanced accuracy. If it’s not already calculated for
894
+all classifications, as in this case for DD, it will be done
895
+automatically.</p>
896
+<div class="sourceCode" id="cb41"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="fu">performancePlot</span>(resultsList)</span></code></pre></div>
897
+<pre><code>## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</code></pre>
898
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-15-1.png" width="700" /></p>
899
+<p>We can observe that the spread of balanced accuracy rates is small,
900
+but slightly wider for the differential distribution classifier.</p>
901
+<p>The features being ranked and selected in the feature selection stage
902
+can be compared within and between classifiers by the plotting functions
903
+<em>rankingPlot</em> and <em>selectionPlot</em>. Consider the task of
904
+visually representing how consistent the feature rankings of the top 100
905
+different features were for the differential distribution classifier for
906
+all 5 folds in the 5 cross-validations.</p>
907
+<div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="fu">rankingPlot</span>(DDresults, <span class="at">topRanked =</span> <span class="dv">1</span><span class="sc">:</span><span class="dv">100</span>, <span class="at">xLabelPositions =</span> <span class="fu">c</span>(<span class="dv">1</span>, <span class="fu">seq</span>(<span class="dv">10</span>, <span class="dv">100</span>, <span class="dv">10</span>)))</span></code></pre></div>
908
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-16-1.png" width="700" /></p>
909
+<p>The top-ranked features are fairly similar between all pairs of the
910
+20 cross-validations.</p>
911
+<p>For a large cross-validation scheme, such as leave-2-out
912
+cross-validation, or when <em>results</em> contains many
913
+classifications, there are many feature set comparisons to make. Note
914
+that <em>rankingPlot</em> and <em>selectionPlot</em> have a
915
+<em>parallelParams</em> options which allows for the calculation of
916
+feature set overlaps to be done on multiple processors.</p>
917
+</div>
918
+<div id="generating-a-roc-plot" class="section level3">
919
+<h3>Generating a ROC Plot</h3>
920
+<p>Some classifiers can output scores or probabilities representing how
921
+likely a sample is to be from one of the classes, instead of, or as well
922
+as, class labels. This enables different score thresholds to be tried,
923
+to generate pairs of false positive and false negative rates. The naive
924
+Bayes classifier used previously by default has its <em>returnType</em>
925
+parameter set to <em>“both”</em>, so class predictions and scores are
926
+both stored in the classification result. So does diagonal LDA. In this
927
+case, a data frame with class predictions and scores for each class is
928
+returned by the classifier to the cross-validation framework. Setting
929
+<em>returnType</em> to <em>“score”</em> for a classifier which has such
930
+an option is also sufficient to generate a ROC plot. Many existing
931
+classifiers in other R packages also have an option that allows a score
932
+or probability to be calculated.</p>
933
+<p>By default, scores from different iterations of prediction are merged
934
+and one line is drawn per classification. Alternatively, setting
935
+<em>mode = “average”</em> will consider each iteration of prediction
936
+separately, average them and also calculate and draw confidence
937
+intervals. The default interval is a 95% interval and is customisable by
938
+setting <em>interval</em>.</p>
939
+<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ROCplot</span>(resultsList, <span class="at">fontSizes =</span> <span class="fu">c</span>(<span class="dv">24</span>, <span class="dv">12</span>, <span class="dv">12</span>, <span class="dv">12</span>, <span class="dv">12</span>))</span></code></pre></div>
940
+<p><img src="ClassifyR_files/figure-html/unnamed-chunk-17-1.png" width="576" /></p>
941
+<p>This ROC plot shows the classifiability of the asthma data set is
942
+high. Some examples of functions which output scores are
943
+<em>fisherDiscriminant</em>, <em>DLDApredictInterface</em>, and
944
+<em>SVMpredictInterface</em>.</p>
945
+</div>
946
+</div>
947
+<div id="other-use-cases" class="section level2">
948
+<h2>Other Use Cases</h2>
949
+<p>Apart from cross-validation of one data set, ClassifyR can be used in
950
+a couple of other ways.</p>
951
+<div id="using-an-independent-test-set" class="section level3">
952
+<h3>Using an Independent Test Set</h3>
953
+<p>Sometimes, cross-validation is unnecessary. This happens when studies
954
+have large sample sizes and are designed such that a large number of
955
+samples is prespecified to form a test set. The classifier is only
956
+trained on the training sample set, and makes predictions only on the
957
+test sample set. This can be achieved by using the function
958
+<em>runTest</em> directly. See its documentation for required
959
+inputs.</p>
960
+</div>
961
+<div id="cross-validating-selected-features-on-a-different-data-set"
962
+class="section level3">
963
+<h3>Cross-validating Selected Features on a Different Data Set</h3>
964
+<p>Once a cross-validated classification is complete, the usefulness of
965
+the features selected may be explored in another dataset.
966
+<em>previousSelection</em> is a function which takes an existing
967
+<em>ClassifyResult</em> object and returns the features selected at the
968
+equivalent iteration which is currently being processed. This is
969
+necessary, because the models trained on one data set are not directly
970
+transferrable to a new dataset; the classifier training (e.g. choosing
971
+thresholds, fitting model coefficients) is redone. Of course, the
972
+features in the new dataset should have the same naming system as the
973
+ones in the old dataset.</p>
974
+</div>
975
+<div id="parameter-tuning" class="section level3">
976
+<h3>Parameter Tuning</h3>
977
+<p>Some feature ranking methods or classifiers allow the choosing of
978
+tuning parameters, which controls some aspect of their model learning.
979
+An example of doing parameter tuning with a linear SVM is presented.
980
+This particular SVM has a single tuning parameter, the cost. Higher
981
+values of this parameter penalise misclassifications more. Moreover,
982
+feature selection happens by using a feature ranking function and then
983
+trying a range of top-ranked features to see which gives the best
984
+performance, the range being specified by a list element named
985
+<em>nFeatures</em> and the performance type (e.g. Balanced Accuracy)
986
+specified by a list element named <em>performanceType</em>. Therefore,
987
+some kind of parameter tuning always happens, even if the feature
988
+ranking or classifier function does not have any explicit tuning
989
+parameters.</p>
990
+<p>Tuning is achieved in ClassifyR by providing a variable called
991
+<em>tuneParams</em> to the SelectParams or TrainParams constructor.
992
+<em>tuneParams</em> is a named list, with the names being the names of
993
+the tuning variables, except for one which is named
994
+<em>“performanceType”</em> and specifies the performance metric to use
995
+for picking the parameter values. Any of the non-sample-specific
996
+performance metrics which <em>calcCVperformance</em> calculates can be
997
+optimised.</p>
998
+<div class="sourceCode" id="cb45"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>tuneList <span class="ot">&lt;-</span> <span class="fu">list</span>(<span class="at">cost =</span> <span class="fu">c</span>(<span class="fl">0.01</span>, <span class="fl">0.1</span>, <span class="dv">1</span>, <span class="dv">10</span>))</span>
999
+<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>SVMparams <span class="ot">&lt;-</span> <span class="fu">ModellingParams</span>(<span class="at">trainParams =</span> <span class="fu">TrainParams</span>(<span class="st">&quot;SVM&quot;</span>, <span class="at">kernel =</span> <span class="st">&quot;linear&quot;</span>, <span class="at">tuneParams =</span> tuneList),</span>
1000
+<span id="cb45-3"><a href="#cb45-3" aria-hidden="true" tabindex="-1"></a>                             <span class="at">predictParams =</span> <span class="fu">PredictParams</span>(<span class="st">&quot;SVM&quot;</span>))</span>
1001
+<span id="cb45-4"><a href="#cb45-4" aria-hidden="true" tabindex="-1"></a>SVMresults <span class="ot">&lt;-</span> <span class="fu">runTests</span>(measurements, classes, crossValParams, SVMparams)</span></code></pre></div>
1002
+<pre><code>## Processing sample set 10.</code></pre>
1003
+<pre><code>## Processing sample set 20.</code></pre>
1004
+<p>The index of chosen of the parameters, as well as all combinations of
1005
+parameters and their associated performance metric, are stored for every
1006
+validation, and can be accessed with the <em>tunedParameters</em>
1007
+function.</p>
1008
+<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="fu">length</span>(<span class="fu">tunedParameters</span>(SVMresults))</span></code></pre></div>
1009
+<pre><code>## [1] 25</code></pre>
1010
+<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="fu">tunedParameters</span>(SVMresults)[<span class="dv">1</span><span class="sc">:</span><span class="dv">5</span>]</span></code></pre></div>
1011
+<pre><code>## [[1]]
1012
+## [[1]]$tuneCombinations
1013
+##    topN  cost Balanced Accuracy
1014
+## 1    10  0.01         0.8507719
1015
+## 2    20  0.01         0.8551553
1016
+## 3    30  0.01         0.8696398
1017
+## 4    40  0.01         0.9073756
1018
+## 5    50  0.01         0.8986087
1019
+## 6    60  0.01         0.8986087
1020
+## 7    70  0.01         0.8942253
1021
+## 8    80  0.01         0.9036592
1022
+## 9    90  0.01         0.9036592
1023
+## 10  100  0.01         0.8986087
1024
+## 11   10  0.10         0.8608729
1025
+## 12   20  0.10         0.8942253
1026
+## 13   30  0.10         0.8746903
1027
+## 14   40  0.10         0.9188107
1028
+## 15   50  0.10         0.9087097
1029
+## 16   60  0.10         0.9137602
1030
+## 17   70  0.10         0.9188107
1031
+## 18   80  0.10         0.9137602
1032
+## 19   90  0.10         0.9238613
1033
+## 20  100  0.10         0.9477797
1034
+## 21   10  1.00         0.8992758
1035
+## 22   20  1.00         0.8898418
1036
+## 23   30  1.00         0.9144273
1037
+## 24   40  1.00         0.9049933
1038
+## 25   50  1.00         0.9666476
1039
+## 26   60  1.00         0.9811321
1040
+## 27   70  1.00         0.9855155
1041
+## 28   80  1.00         1.0000000
1042
+## 29   90  1.00         1.0000000
1043
+## 30  100  1.00         1.0000000
1044
+## 31   10 10.00         0.9043263
1045
+## 32   20 10.00         0.8905089
1046
+## 33   30 10.00         0.9289118
1047
+## 34   40 10.00         0.9855155
1048
+## 35   50 10.00         1.0000000
1049
+## 36   60 10.00         1.0000000
1050
+## 37   70 10.00         1.0000000
1051
+## 38   80 10.00         1.0000000
1052
+## 39   90 10.00         1.0000000
1053
+## 40  100 10.00         1.0000000
1054
+## 
1055
+## [[1]]$bestIndex
1056
+## [1] 28
1057
+## 
1058
+## 
1059
+## [[2]]
1060
+## [[2]]$tuneCombinations
1061
+##    topN  cost Balanced Accuracy
1062
+## 1    10  0.01         0.8066514
1063
+## 2    20  0.01         0.7783495
1064
+## 3    30  0.01         0.7877835
1065
+## 4    40  0.01         0.7783495
1066
+## 5    50  0.01         0.8117019
1067
+## 6    60  0.01         0.8117019
1068
+## 7    70  0.01         0.8117019
1069
+## 8    80  0.01         0.8261864
1070
+## 9    90  0.01         0.8261864
1071
+## 10  100  0.01         0.8261864
1072
+## 11   10  0.10         0.7928340
1073
+## 12   20  0.10         0.8029350
1074
+## 13   30  0.10         0.8406709
1075
+## 14   40  0.10         0.8406709
1076
+## 15   50  0.10         0.8457214
1077
+## 16   60  0.10         0.8551553
1078
+## 17   70  0.10         0.9181437
1079
+## 18   80  0.10         0.9326282
1080
+## 19   90  0.10         0.9275777
1081
+## 20  100  0.10         0.9326282
1082
+## 21   10  1.00         0.7746331
1083
+## 22   20  1.00         0.8602058
1084
+## 23   30  1.00         0.8652563
1085
+## 24   40  1.00         0.9023251
1086
+## 25   50  1.00         0.9413951
1087
+## 26   60  1.00         0.9514961
1088
+## 27   70  1.00         0.9521631
1089
+## 28   80  1.00