...
|
...
|
@@ -6,12 +6,12 @@
|
6
|
6
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
7
|
7
|
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
8
|
8
|
<meta name="description" content="ClassifyR">
|
9
|
|
-<title>An Introduction to ClassifyR</title>
|
|
9
|
+<title>An Introduction to **ClassifyR** • ClassifyR</title>
|
10
|
10
|
<script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
|
11
|
11
|
<link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
|
12
|
12
|
<script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
|
13
|
13
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
|
14
|
|
-<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to ClassifyR">
|
|
14
|
+<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to **ClassifyR**">
|
15
|
15
|
<meta property="og:description" content="ClassifyR">
|
16
|
16
|
<!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
|
17
|
17
|
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
|
...
|
...
|
@@ -22,7 +22,7 @@
|
22
|
22
|
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
|
23
|
23
|
|
24
|
24
|
|
25
|
|
- <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
|
|
25
|
+ <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-dark"><div class="container">
|
26
|
26
|
|
27
|
27
|
<a class="navbar-brand me-2" href="../index.html">ClassifyR</a>
|
28
|
28
|
|
...
|
...
|
@@ -58,10 +58,13 @@
|
58
|
58
|
|
59
|
59
|
|
60
|
60
|
|
61
|
|
-<script src="ClassifyR_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
|
|
61
|
+
|
|
62
|
+<div class="row">
|
62
|
63
|
<main id="main" class="col-md-9"><div class="page-header">
|
63
|
|
- <img src="" class="logo" alt=""><h1>An Introduction to ClassifyR</h1>
|
64
|
|
- <h4 data-toc-skip class="author">Dario Strbenac, Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University of Sydney, Australia.</h4>
|
|
64
|
+ <img src="" class="logo" alt=""><h1>An Introduction to **ClassifyR**</h1>
|
|
65
|
+ <h4 data-toc-skip class="author">Dario Strbenac,
|
|
66
|
+Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University
|
|
67
|
+of Sydney, Australia.</h4>
|
65
|
68
|
|
66
|
69
|
|
67
|
70
|
|
...
|
...
|
@@ -73,30 +76,58 @@
|
73
|
76
|
<div class="section level2">
|
74
|
77
|
<h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
|
75
|
78
|
</h2>
|
76
|
|
-<p>Typically, each feature selection method or classifier originates from a different R package, which <strong>ClassifyR</strong> provides a wrapper around. By default, only high-performance t-test/F-test and random forest are installed. If you intend to compare between numerous different modelling methods, you should install all suggested packages at once by using the command <code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>. This will take a few minutes, particularly on Linux, because each package will be compiled from source code.</p>
|
|
79
|
+<p>Typically, each feature selection method or classifier originates
|
|
80
|
+from a different R package, which <strong>ClassifyR</strong> provides a
|
|
81
|
+wrapper around. By default, only high-performance t-test/F-test and
|
|
82
|
+random forest are installed. If you intend to compare between numerous
|
|
83
|
+different modelling methods, you should install all suggested packages
|
|
84
|
+at once by using the command
|
|
85
|
+<code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>.
|
|
86
|
+This will take a few minutes, particularly on Linux, because each
|
|
87
|
+package will be compiled from source code.</p>
|
77
|
88
|
</div>
|
78
|
89
|
<div class="section level2">
|
79
|
90
|
<h2 id="overview">Overview<a class="anchor" aria-label="anchor" href="#overview"></a>
|
80
|
91
|
</h2>
|
81
|
|
-<p><strong>ClassifyR</strong> provides a structured pipeline for cross-validated classification. Classification is viewed in terms of four stages, data transformation, feature selection, classifier training, and prediction. The driver functions <em>crossValidate</em> and <em>runTests</em> implements varieties of cross-validation. They are:</p>
|
|
92
|
+<p><strong>ClassifyR</strong> provides a structured pipeline for
|
|
93
|
+cross-validated classification. Classification is viewed in terms of
|
|
94
|
+four stages, data transformation, feature selection, classifier
|
|
95
|
+training, and prediction. The driver functions <em>crossValidate</em>
|
|
96
|
+and <em>runTests</em> implements varieties of cross-validation. They
|
|
97
|
+are:</p>
|
82
|
98
|
<ul>
|
83
|
|
-<li>Permutation of the order of samples followed by k-fold cross-validation (runTests only)</li>
|
|
99
|
+<li>Permutation of the order of samples followed by k-fold
|
|
100
|
+cross-validation (runTests only)</li>
|
84
|
101
|
<li>Repeated x% test set cross-validation</li>
|
85
|
102
|
<li>leave-k-out cross-validation</li>
|
86
|
103
|
</ul>
|
87
|
|
-<p>Driver functions can use parallel processing capabilities in R to speed up cross-validations when many CPUs are available. The output of the driver functions is a <em>ClassifyResult</em> object which can be directly used by the performance evaluation functions. The process of classification is summarised by a flowchart.</p>
|
88
|
|
-<img src="" style="margin-left: auto;margin-right: auto"><p>Importantly, ClassifyR implements a number of methods for classification using different kinds of changes in measurements between classes. Most classifiers work with features where the means are different. In addition to changes in means (DM), <strong>ClassifyR</strong> also allows for classification using differential variability (DV; changes in scale) and differential distribution (DD; changes in location and/or scale).</p>
|
|
104
|
+<p>Driver functions can use parallel processing capabilities in R to
|
|
105
|
+speed up cross-validations when many CPUs are available. The output of
|
|
106
|
+the driver functions is a <em>ClassifyResult</em> object which can be
|
|
107
|
+directly used by the performance evaluation functions. The process of
|
|
108
|
+classification is summarised by a flowchart.</p>
|
|
109
|
+<img src="" style="margin-left: auto;margin-right: auto"><p>Importantly, ClassifyR implements a number of methods for
|
|
110
|
+classification using different kinds of changes in measurements between
|
|
111
|
+classes. Most classifiers work with features where the means are
|
|
112
|
+different. In addition to changes in means (DM),
|
|
113
|
+<strong>ClassifyR</strong> also allows for classification using
|
|
114
|
+differential variability (DV; changes in scale) and differential
|
|
115
|
+distribution (DD; changes in location and/or scale).</p>
|
89
|
116
|
<div class="section level3">
|
90
|
117
|
<h3 id="case-study-diagnosing-asthma">Case Study: Diagnosing Asthma<a class="anchor" aria-label="anchor" href="#case-study-diagnosing-asthma"></a>
|
91
|
118
|
</h3>
|
92
|
|
-<p>To demonstrate some key features of ClassifyR, a data set consisting of the 2000 most variably expressed genes and 190 people will be used to quickly obtain results. The journal article corresponding to the data set was published in <em>Scientific Reports</em> in 2018 and is titled <a href="http://www.nature.com/articles/s41598-018-27189-4" class="external-link">A Nasal Brush-based Classifier of Asthma Identified by Machine Learning Analysis of Nasal RNA Sequence Data</a>.</p>
|
|
119
|
+<p>To demonstrate some key features of ClassifyR, a data set consisting
|
|
120
|
+of the 2000 most variably expressed genes and 190 people will be used to
|
|
121
|
+quickly obtain results. The journal article corresponding to the data
|
|
122
|
+set was published in <em>Scientific Reports</em> in 2018 and is titled
|
|
123
|
+<a href="http://www.nature.com/articles/s41598-018-27189-4" class="external-link">A Nasal
|
|
124
|
+Brush-based Classifier of Asthma Identified by Machine Learning Analysis
|
|
125
|
+of Nasal RNA Sequence Data</a>.</p>
|
93
|
126
|
<p>Load the package.</p>
|
94
|
127
|
<div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
|
95
|
128
|
<code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://sydneybiox.github.io/ClassifyR/">ClassifyR</a></span><span class="op">)</span></span></code></pre></div>
|
96
|
|
-<pre><code><span><span class="co">## Warning: multiple methods tables found for 'aperm'</span></span></code></pre>
|
97
|
|
-<pre><code><span><span class="co">## Warning: replacing previous import 'BiocGenerics::aperm' by 'DelayedArray::aperm' when loading 'SummarizedExperiment'</span></span></code></pre>
|
98
|
129
|
<p>A glimpse at the RNA measurements and sample classes.</p>
|
99
|
|
-<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
|
|
130
|
+<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
|
100
|
131
|
<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html" class="external-link">data</a></span><span class="op">(</span><span class="va">asthma</span><span class="op">)</span> <span class="co"># Contains measurements and classes variables.</span></span>
|
101
|
132
|
<span><span class="va">measurements</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">5</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span></code></pre></div>
|
102
|
133
|
<pre><code><span><span class="co">## HBB BPIFA1 XIST FCGR3B HBA2</span></span>
|
...
|
...
|
@@ -105,19 +136,44 @@
|
105
|
136
|
<span><span class="co">## Sample 3 12.15 17.44 10.21 7.87 9.68</span></span>
|
106
|
137
|
<span><span class="co">## Sample 4 10.60 11.87 6.27 14.75 8.96</span></span>
|
107
|
138
|
<span><span class="co">## Sample 5 8.18 15.01 11.21 6.77 6.43</span></span></code></pre>
|
108
|
|
-<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
|
|
139
|
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
|
109
|
140
|
<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">classes</span><span class="op">)</span></span></code></pre></div>
|
110
|
141
|
<pre><code><span><span class="co">## [1] No No No No Yes No </span></span>
|
111
|
142
|
<span><span class="co">## Levels: No Yes</span></span></code></pre>
|
112
|
|
-<p>The numeric matrix variable <em>measurements</em> stores the normalised values of the RNA gene abundances for each sample and the factor vector <em>classes</em> identifies which class the samples belong to. The measurements were normalised using <strong>DESeq2</strong>’s <em>varianceStabilizingTransformation</em> function, which produces <span class="math inline">\(log_2\)</span>-like data.</p>
|
113
|
|
-<p>For more complex data sets with multiple kinds of experiments (e.g. DNA methylation, copy number, gene expression on the same set of samples) a <a href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html" class="external-link"><em>MultiAssayExperiment</em></a> is recommended for data storage and supported by <strong>ClassifyR</strong>’s methods.</p>
|
|
143
|
+<p>The numeric matrix variable <em>measurements</em> stores the
|
|
144
|
+normalised values of the RNA gene abundances for each sample and the
|
|
145
|
+factor vector <em>classes</em> identifies which class the samples belong
|
|
146
|
+to. The measurements were normalised using <strong>DESeq2</strong>’s
|
|
147
|
+<em>varianceStabilizingTransformation</em> function, which produces
|
|
148
|
+<span class="math inline">\(log_2\)</span>-like data.</p>
|
|
149
|
+<p>For more complex data sets with multiple kinds of experiments
|
|
150
|
+(e.g. DNA methylation, copy number, gene expression on the same set of
|
|
151
|
+samples) a <a href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html" class="external-link"><em>MultiAssayExperiment</em></a>
|
|
152
|
+is recommended for data storage and supported by
|
|
153
|
+<strong>ClassifyR</strong>’s methods.</p>
|
114
|
154
|
</div>
|
115
|
155
|
</div>
|
116
|
156
|
<div class="section level2">
|
117
|
157
|
<h2 id="quick-start-crossvalidate-function">Quick Start: <em>crossValidate</em> Function<a class="anchor" aria-label="anchor" href="#quick-start-crossvalidate-function"></a>
|
118
|
158
|
</h2>
|
119
|
|
-<p>The <em>crossValidate</em> function offers a quick and simple way to start analysing a dataset in ClassifyR. It is a wrapper for <em>runTests</em>, the core model building and testing function of ClassifyR. <em>crossValidate</em> must be supplied with <em>measurements</em>, a simple tabular data container or a list-like structure of such related tabular data on common samples. The classes of it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>, <em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>. For a dataset with <span class="math inline">\(n\)</span> observations and <span class="math inline">\(p\)</span> variables, the <em>crossValidate</em> function will accept inputs of the following shapes:</p>
|
|
159
|
+<p>The <em>crossValidate</em> function offers a quick and simple way to
|
|
160
|
+start analysing a dataset in ClassifyR. It is a wrapper for
|
|
161
|
+<em>runTests</em>, the core model building and testing function of
|
|
162
|
+ClassifyR. <em>crossValidate</em> must be supplied with
|
|
163
|
+<em>measurements</em>, a simple tabular data container or a list-like
|
|
164
|
+structure of such related tabular data on common samples. The classes of
|
|
165
|
+it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>,
|
|
166
|
+<em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>.
|
|
167
|
+For a dataset with <span class="math inline">\(n\)</span> observations
|
|
168
|
+and <span class="math inline">\(p\)</span> variables, the
|
|
169
|
+<em>crossValidate</em> function will accept inputs of the following
|
|
170
|
+shapes:</p>
|
120
|
171
|
<table class="table">
|
|
172
|
+<colgroup>
|
|
173
|
+<col width="25%">
|
|
174
|
+<col width="37%">
|
|
175
|
+<col width="37%">
|
|
176
|
+</colgroup>
|
121
|
177
|
<thead><tr class="header">
|
122
|
178
|
<th>Data Type</th>
|
123
|
179
|
<th align="center"><span class="math inline">\(n \times p\)</span></th>
|
...
|
...
|
@@ -146,34 +202,64 @@
|
146
|
202
|
</tr>
|
147
|
203
|
<tr class="odd">
|
148
|
204
|
<td>
|
149
|
|
-<span style="font-family: 'Courier New', monospace;">list</span> of <span style="font-family: 'Courier New', monospace;">data.frame</span>s</td>
|
|
205
|
+<span style="font-family: 'Courier New', monospace;">list</span> of
|
|
206
|
+<span style="font-family: 'Courier New', monospace;">data.frame</span>s</td>
|
150
|
207
|
<td align="center">✔</td>
|
151
|
208
|
<td align="center"></td>
|
152
|
209
|
</tr>
|
153
|
210
|
</tbody>
|
154
|
211
|
</table>
|
155
|
|
-<p><em>crossValidate</em> must also be supplied with <em>outcome</em>, which represents the prediction to be made in a variety of possible ways.</p>
|
|
212
|
+<p><em>crossValidate</em> must also be supplied with <em>outcome</em>,
|
|
213
|
+which represents the prediction to be made in a variety of possible
|
|
214
|
+ways.</p>
|
156
|
215
|
<ul>
|
157
|
|
-<li>A <em>factor</em> that contains the class label for each observation. <em>classes</em> must be of length <span class="math inline">\(n\)</span>.</li>
|
158
|
|
-<li>A <em>character</em> of length 1 that matches a column name in a data frame which holds the classes. The classes will automatically be removed before training is done.</li>
|
159
|
|
-<li>A <em>Surv</em> object of the same length as the number of samples in the data which contains information about the time and censoring of the samples.</li>
|
160
|
|
-<li>A <em>character</em> vector of length 2 or 3 that each match a column name in a data frame which holds information about the time and censoring of the samples. The time-to-event columns will automatically be removed before training is done.</li>
|
|
216
|
+<li>A <em>factor</em> that contains the class label for each
|
|
217
|
+observation. <em>classes</em> must be of length <span class="math inline">\(n\)</span>.</li>
|
|
218
|
+<li>A <em>character</em> of length 1 that matches a column name in a
|
|
219
|
+data frame which holds the classes. The classes will automatically be
|
|
220
|
+removed before training is done.</li>
|
|
221
|
+<li>A <em>Surv</em> object of the same length as the number of samples
|
|
222
|
+in the data which contains information about the time and censoring of
|
|
223
|
+the samples.</li>
|
|
224
|
+<li>A <em>character</em> vector of length 2 or 3 that each match a
|
|
225
|
+column name in a data frame which holds information about the time and
|
|
226
|
+censoring of the samples. The time-to-event columns will automatically
|
|
227
|
+be removed before training is done.</li>
|
161
|
228
|
</ul>
|
162
|
|
-<p>The type of classifier used can be changed with the <em>classifier</em> argument. The default is a random forest, which seamlessly handles categorical and numerical data. A full list of classifiers can be seen by running <em>?crossValidate</em>. A feature selection step can be performed before classification using <em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by default. Similarly, the number of folds and number of repeats for cross validation can be changed with the <em>nFolds</em> and <em>nRepeats</em> arguments. If wanted, <em>nCores</em> can be specified to run the cross validation in parallel. To perform 5-fold cross-validation of a Support Vector Machine with 2 repeats:</p>
|
163
|
|
-<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
|
|
229
|
+<p>The type of classifier used can be changed with the
|
|
230
|
+<em>classifier</em> argument. The default is a random forest, which
|
|
231
|
+seamlessly handles categorical and numerical data. A full list of
|
|
232
|
+classifiers can be seen by running <em>?crossValidate</em>. A feature
|
|
233
|
+selection step can be performed before classification using
|
|
234
|
+<em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by
|
|
235
|
+default. Similarly, the number of folds and number of repeats for cross
|
|
236
|
+validation can be changed with the <em>nFolds</em> and <em>nRepeats</em>
|
|
237
|
+arguments. If wanted, <em>nCores</em> can be specified to run the cross
|
|
238
|
+validation in parallel. To perform 5-fold cross-validation of a Support
|
|
239
|
+Vector Machine with 2 repeats:</p>
|
|
240
|
+<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
|
164
|
241
|
<code class="sourceCode R"><span><span class="va">result</span> <span class="op"><-</span> <span class="fu"><a href="../reference/crossValidate.html">crossValidate</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, classifier <span class="op">=</span> <span class="st">"SVM"</span>,</span>
|
165
|
242
|
<span> nFeatures <span class="op">=</span> <span class="fl">20</span>, nFolds <span class="op">=</span> <span class="fl">5</span>, nRepeats <span class="op">=</span> <span class="fl">2</span>, nCores <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
|
166
|
243
|
<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
167
|
|
-<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
|
|
244
|
+<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
|
168
|
245
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span><span class="op">)</span></span></code></pre></div>
|
169
|
246
|
<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
|
170
|
247
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-5-1.png" width="700"></p>
|
171
|
248
|
<div class="section level3">
|
172
|
249
|
<h3 id="data-integration-with-crossvalidate">Data Integration with crossValidate<a class="anchor" aria-label="anchor" href="#data-integration-with-crossvalidate"></a>
|
173
|
250
|
</h3>
|
174
|
|
-<p><em>crossValidate</em> also allows data from multiple sources to be integrated into a single model. The integration method can be specified with <em>multiViewMethod</em> argument. In this example, suppose the first 10 variables in the asthma data set are from a certain source and the remaining 1990 variables are from a second source. To integrate multiple data sets, each variable must be labeled with the data set it came from. This is done in a different manner depending on the data type of <em>measurements</em>.</p>
|
175
|
|
-<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified using <em>mcols</em>. In the column metadata, each feature must have an <em>assay</em> and a <em>feature</em> name.</p>
|
176
|
|
-<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
|
|
251
|
+<p><em>crossValidate</em> also allows data from multiple sources to be
|
|
252
|
+integrated into a single model. The integration method can be specified
|
|
253
|
+with <em>multiViewMethod</em> argument. In this example, suppose the
|
|
254
|
+first 10 variables in the asthma data set are from a certain source and
|
|
255
|
+the remaining 1990 variables are from a second source. To integrate
|
|
256
|
+multiple data sets, each variable must be labeled with the data set it
|
|
257
|
+came from. This is done in a different manner depending on the data type
|
|
258
|
+of <em>measurements</em>.</p>
|
|
259
|
+<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified
|
|
260
|
+using <em>mcols</em>. In the column metadata, each feature must have an
|
|
261
|
+<em>assay</em> and a <em>feature</em> name.</p>
|
|
262
|
+<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
|
177
|
263
|
<code class="sourceCode R"><span><span class="va">measurementsDF</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/DataFrame-class.html" class="external-link">DataFrame</a></span><span class="op">(</span><span class="va">measurements</span><span class="op">)</span></span>
|
178
|
264
|
<span><span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/Vector-class.html" class="external-link">mcols</a></span><span class="op">(</span><span class="va">measurementsDF</span><span class="op">)</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span></span>
|
179
|
265
|
<span> assay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay_1"</span>, <span class="st">"assay_2"</span><span class="op">)</span>, times <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">10</span>, <span class="fl">1990</span><span class="op">)</span><span class="op">)</span>,</span>
|
...
|
...
|
@@ -185,12 +271,13 @@
|
185
|
271
|
<pre><code><span><span class="co">## Processing sample set 10.</span></span>
|
186
|
272
|
<span><span class="co">## Processing sample set 10.</span></span>
|
187
|
273
|
<span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
188
|
|
-<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
|
|
274
|
+<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
|
189
|
275
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
|
190
|
276
|
<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
|
191
|
277
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-6-1.png" width="700"></p>
|
192
|
|
-<p>If using a list of <em>data.frame</em>s, the name of each element in the list will be used as the assay name.</p>
|
193
|
|
-<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
|
|
278
|
+<p>If using a list of <em>data.frame</em>s, the name of each element in
|
|
279
|
+the list will be used as the assay name.</p>
|
|
280
|
+<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
|
194
|
281
|
<code class="sourceCode R"><span><span class="co"># Assigns first 10 variables to dataset_1, and the rest to dataset_2</span></span>
|
195
|
282
|
<span><span class="va">measurementsList</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span></span>
|
196
|
283
|
<span> <span class="op">(</span><span class="va">measurements</span> <span class="op">|></span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">10</span><span class="op">]</span>,</span>
|
...
|
...
|
@@ -203,7 +290,7 @@
|
203
|
290
|
<pre><code><span><span class="co">## Processing sample set 10.</span></span>
|
204
|
291
|
<span><span class="co">## Processing sample set 10.</span></span>
|
205
|
292
|
<span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
206
|
|
-<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
|
|
293
|
+<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
|
207
|
294
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
|
208
|
295
|
<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
|
209
|
296
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-7-1.png" width="700"></p>
|
...
|
...
|
@@ -212,11 +299,17 @@
|
212
|
299
|
<div class="section level2">
|
213
|
300
|
<h2 id="a-more-detailed-look-at-classifyr">A More Detailed Look at ClassifyR<a class="anchor" aria-label="anchor" href="#a-more-detailed-look-at-classifyr"></a>
|
214
|
301
|
</h2>
|
215
|
|
-<p>In the following sections, some of the most useful functions provided in <strong>ClassifyR</strong> will be demonstrated. However, a user could wrap any feature selection, training, or prediction function to the classification framework, as long as it meets some simple rules about the input and return parameters. See the appendix section of this guide titled “Rules for New Functions” for a description of these.</p>
|
|
302
|
+<p>In the following sections, some of the most useful functions provided
|
|
303
|
+in <strong>ClassifyR</strong> will be demonstrated. However, a user
|
|
304
|
+could wrap any feature selection, training, or prediction function to
|
|
305
|
+the classification framework, as long as it meets some simple rules
|
|
306
|
+about the input and return parameters. See the appendix section of this
|
|
307
|
+guide titled “Rules for New Functions” for a description of these.</p>
|
216
|
308
|
<div class="section level3">
|
217
|
309
|
<h3 id="comparison-to-existing-classification-frameworks">Comparison to Existing Classification Frameworks<a class="anchor" aria-label="anchor" href="#comparison-to-existing-classification-frameworks"></a>
|
218
|
310
|
</h3>
|
219
|
|
-<p>There are a few other frameworks for classification in R. The table below provides a comparison of which features they offer.</p>
|
|
311
|
+<p>There are a few other frameworks for classification in R. The table
|
|
312
|
+below provides a comparison of which features they offer.</p>
|
220
|
313
|
<table class="table">
|
221
|
314
|
<colgroup>
|
222
|
315
|
<col width="8%">
|
...
|
...
|
@@ -302,12 +395,19 @@
|
302
|
395
|
<div class="section level3">
|
303
|
396
|
<h3 id="provided-functionality">Provided Functionality<a class="anchor" aria-label="anchor" href="#provided-functionality"></a>
|
304
|
397
|
</h3>
|
305
|
|
-<p>Although being a cross-validation framework, a number of popular feature selection and classification functions are provided by the package which meet the requirements of functions to be used by it (see the last section).</p>
|
|
398
|
+<p>Although being a cross-validation framework, a number of popular
|
|
399
|
+feature selection and classification functions are provided by the
|
|
400
|
+package which meet the requirements of functions to be used by it (see
|
|
401
|
+the last section).</p>
|
306
|
402
|
<div class="section level4">
|
307
|
403
|
<h4 id="provided-methods-for-feature-selection-and-classification">Provided Methods for Feature Selection and Classification<a class="anchor" aria-label="anchor" href="#provided-methods-for-feature-selection-and-classification"></a>
|
308
|
404
|
</h4>
|
309
|
|
-<p>In the following tables, a function that is used when no function is explicitly specified by the user is shown as <span style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
|
310
|
|
-<p>The functions below produce a ranking, of which different size subsets are tried and the classifier performance evaluated, to select a best subset of features, based on a criterion such as balanced accuracy rate, for example.</p>
|
|
405
|
+<p>In the following tables, a function that is used when no function is
|
|
406
|
+explicitly specified by the user is shown as <span style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
|
|
407
|
+<p>The functions below produce a ranking, of which different size
|
|
408
|
+subsets are tried and the classifier performance evaluated, to select a
|
|
409
|
+best subset of features, based on a criterion such as balanced accuracy
|
|
410
|
+rate, for example.</p>
|
311
|
411
|
<table style="width:100%;" class="table">
|
312
|
412
|
<colgroup>
|
313
|
413
|
<col width="9%">
|
...
|
...
|
@@ -361,7 +461,8 @@
|
361
|
461
|
</tr>
|
362
|
462
|
<tr class="even">
|
363
|
463
|
<td><span style="font-family: 'Courier New', monospace;">DMDranking</span></td>
|
364
|
|
-<td><span style="white-space: nowrap">Difference in location (mean/median) and/or scale (SD, MAD, <span class="math inline">\(Q_n\)</span>)</span></td>
|
|
464
|
+<td><span style="white-space: nowrap">Difference in location
|
|
465
|
+(mean/median) and/or scale (SD, MAD, <span class="math inline">\(Q_n\)</span>)</span></td>
|
365
|
466
|
<td>✔</td>
|
366
|
467
|
<td>✔</td>
|
367
|
468
|
<td>✔</td>
|
...
|
...
|
@@ -410,7 +511,9 @@
|
410
|
511
|
<td>
|
411
|
512
|
<span style="padding:1px; border:2px dashed #e64626; display:inline-block; margin-bottom: 3px; font-family: 'Courier New', monospace;">DLDAtrainInterface</span>,<br><span style="padding:1px; border:2px dashed #e64626; display:inline-block; font-family: 'Courier New', monospace;">DLDApredictInterface</span>
|
412
|
513
|
</td>
|
413
|
|
-<td>Wrappers for sparsediscrim’s functions <span style="font-family: 'Courier New', monospace;">dlda</span> and <span style="font-family: 'Courier New', monospace;">predict.dlda</span> functions</td>
|
|
514
|
+<td>Wrappers for sparsediscrim’s functions <span style="font-family: 'Courier New', monospace;">dlda</span> and
|
|
515
|
+<span style="font-family: 'Courier New', monospace;">predict.dlda</span>
|
|
516
|
+functions</td>
|
414
|
517
|
<td>✔</td>
|
415
|
518
|
<td></td>
|
416
|
519
|
<td></td>
|
...
|
...
|
@@ -425,9 +528,11 @@
|
425
|
528
|
</tr>
|
426
|
529
|
<tr class="odd">
|
427
|
530
|
<td>
|
428
|
|
-<span style="font-family: 'Courier New', monospace;">elasticNetGLMtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">elasticNetGLMpredictInterface</span>
|
|
531
|
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMtrainInterface</span>,
|
|
532
|
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMpredictInterface</span>
|
429
|
533
|
</td>
|
430
|
|
-<td>Wrappers for glmnet’s elastic net GLM functions <span style="font-family: 'Courier New', monospace;">glmnet</span> and <span style="font-family: 'Courier New', monospace;">predict.glmnet</span>
|
|
534
|
+<td>Wrappers for glmnet’s elastic net GLM functions <span style="font-family: 'Courier New', monospace;">glmnet</span> and
|
|
535
|
+<span style="font-family: 'Courier New', monospace;">predict.glmnet</span>
|
431
|
536
|
</td>
|
432
|
537
|
<td>✔</td>
|
433
|
538
|
<td></td>
|
...
|
...
|
@@ -435,9 +540,11 @@
|
435
|
540
|
</tr>
|
436
|
541
|
<tr class="even">
|
437
|
542
|
<td>
|
438
|
|
-<span style="font-family: 'Courier New', monospace;">NSCtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">NSCpredictInterface</span>
|
|
543
|
+<span style="font-family: 'Courier New', monospace;">NSCtrainInterface</span>,
|
|
544
|
+<span style="font-family: 'Courier New', monospace;">NSCpredictInterface</span>
|
439
|
545
|
</td>
|
440
|
|
-<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span style="font-family: 'Courier New', monospace;">pamr.train</span> and <span style="font-family: 'Courier New', monospace;">pamr.predict</span>
|
|
546
|
+<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span style="font-family: 'Courier New', monospace;">pamr.train</span>
|
|
547
|
+and <span style="font-family: 'Courier New', monospace;">pamr.predict</span>
|
441
|
548
|
</td>
|
442
|
549
|
<td>✔</td>
|
443
|
550
|
<td></td>
|
...
|
...
|
@@ -452,7 +559,8 @@
|
452
|
559
|
</tr>
|
453
|
560
|
<tr class="even">
|
454
|
561
|
<td>
|
455
|
|
-<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>, <span style="font-family: 'Courier New', monospace;">mixModelsPredict</span>
|
|
562
|
+<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>,
|
|
563
|
+<span style="font-family: 'Courier New', monospace;">mixModelsPredict</span>
|
456
|
564
|
</td>
|
457
|
565
|
<td>Feature-wise mixtures of normals and voting</td>
|
458
|
566
|
<td>✔</td>
|
...
|
...
|
@@ -468,9 +576,11 @@
|
468
|
576
|
</tr>
|
469
|
577
|
<tr class="even">
|
470
|
578
|
<td>
|
471
|
|
-<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span>
|
|
579
|
+<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>,
|
|
580
|
+<span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span>
|
472
|
581
|
</td>
|
473
|
|
-<td>Wrapper for ranger’s functions <span style="font-family: 'Courier New', monospace;">ranger</span> and <span style="font-family: 'Courier New', monospace;">predict</span>
|
|
582
|
+<td>Wrapper for ranger’s functions <span style="font-family: 'Courier New', monospace;">ranger</span> and
|
|
583
|
+<span style="font-family: 'Courier New', monospace;">predict</span>
|
474
|
584
|
</td>
|
475
|
585
|
<td>✔</td>
|
476
|
586
|
<td>✔</td>
|
...
|
...
|
@@ -478,9 +588,11 @@
|
478
|
588
|
</tr>
|
479
|
589
|
<tr class="odd">
|
480
|
590
|
<td>
|
481
|
|
-<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span>
|
|
591
|
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>,
|
|
592
|
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span>
|
482
|
593
|
</td>
|
483
|
|
-<td>Wrapper for xgboost’s functions <span style="font-family: 'Courier New', monospace;">xgboost</span> and <span style="font-family: 'Courier New', monospace;">predict</span>
|
|
594
|
+<td>Wrapper for xgboost’s functions <span style="font-family: 'Courier New', monospace;">xgboost</span>
|
|
595
|
+and <span style="font-family: 'Courier New', monospace;">predict</span>
|
484
|
596
|
</td>
|
485
|
597
|
<td>✔</td>
|
486
|
598
|
<td>✔</td>
|
...
|
...
|
@@ -496,9 +608,11 @@
|
496
|
608
|
</tr>
|
497
|
609
|
<tr class="odd">
|
498
|
610
|
<td>
|
499
|
|
-<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span>
|
|
611
|
+<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>,
|
|
612
|
+<span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span>
|
500
|
613
|
</td>
|
501
|
|
-<td>Wrapper for e1071’s functions <span style="font-family: 'Courier New', monospace;">svm</span> and <span style="font-family: 'Courier New', monospace;">predict.svm</span>
|
|
614
|
+<td>Wrapper for e1071’s functions <span style="font-family: 'Courier New', monospace;">svm</span> and
|
|
615
|
+<span style="font-family: 'Courier New', monospace;">predict.svm</span>
|
502
|
616
|
</td>
|
503
|
617
|
<td>✔</td>
|
504
|
618
|
<td>✔ †</td>
|
...
|
...
|
@@ -506,13 +620,21 @@
|
506
|
620
|
</tr>
|
507
|
621
|
</tbody>
|
508
|
622
|
</table>
|
509
|
|
-<p>* If ordinary numeric measurements have been transformed to absolute deviations using <span style="font-family: 'Courier New', monospace;">subtractFromLocation</span>.<br> † If the value of <span style="font-family: 'Courier New', monospace;">kernel</span> is not <span style="font-family: 'Courier New', monospace;">“linear”</span>.</p>
|
510
|
|
-<p>If a desired selection or classification method is not already implemented, rules for writing functions to work with <strong>ClassifyR</strong> are outlined in the wrapper vignette. Please visit it for more information.</p>
|
|
623
|
+<p>* If ordinary numeric measurements have been transformed to absolute
|
|
624
|
+deviations using <span style="font-family: 'Courier New', monospace;">subtractFromLocation</span>.<br>
|
|
625
|
+† If the value of <span style="font-family: 'Courier New', monospace;">kernel</span> is
|
|
626
|
+not <span style="font-family: 'Courier New', monospace;">“linear”</span>.</p>
|
|
627
|
+<p>If a desired selection or classification method is not already
|
|
628
|
+implemented, rules for writing functions to work with
|
|
629
|
+<strong>ClassifyR</strong> are outlined in the wrapper vignette. Please
|
|
630
|
+visit it for more information.</p>
|
511
|
631
|
</div>
|
512
|
632
|
<div class="section level4">
|
513
|
633
|
<h4 id="provided-meta-feature-methods">Provided Meta-feature Methods<a class="anchor" aria-label="anchor" href="#provided-meta-feature-methods"></a>
|
514
|
634
|
</h4>
|
515
|
|
-<p>A number of methods are provided for users to enable classification in a feature-set-centric or interactor-centric way. The meta-feature creation functions should be used before cross-validation is done.</p>
|
|
635
|
+<p>A number of methods are provided for users to enable classification
|
|
636
|
+in a feature-set-centric or interactor-centric way. The meta-feature
|
|
637
|
+creation functions should be used before cross-validation is done.</p>
|
516
|
638
|
<table class="table">
|
517
|
639
|
<colgroup>
|
518
|
640
|
<col width="9%">
|
...
|
...
|
@@ -529,25 +651,30 @@
|
529
|
651
|
<tbody>
|
530
|
652
|
<tr class="odd">
|
531
|
653
|
<td><span style="font-family: 'Courier New', monospace;">edgesToHubNetworks</span></td>
|
532
|
|
-<td>Takes a two-column <span style="font-family: 'Courier New', monospace;">matrix</span> or <span style="font-family: 'Courier New', monospace;">DataFrame</span> and finds all nodes with at least a minimum number of interactions</td>
|
|
654
|
+<td>Takes a two-column <span style="font-family: 'Courier New', monospace;">matrix</span> or
|
|
655
|
+<span style="font-family: 'Courier New', monospace;">DataFrame</span>
|
|
656
|
+and finds all nodes with at least a minimum number of interactions</td>
|
533
|
657
|
<td align="center">✔</td>
|
534
|
658
|
<td align="center"></td>
|
535
|
659
|
</tr>
|
536
|
660
|
<tr class="even">
|
537
|
661
|
<td><span style="font-family: 'Courier New', monospace;">featureSetSummary</span></td>
|
538
|
|
-<td><span style="white-space: nowrap">Considers sets of features and calculates their mean or median</span></td>
|
|
662
|
+<td><span style="white-space: nowrap">Considers sets of features and
|
|
663
|
+calculates their mean or median</span></td>
|
539
|
664
|
<td align="center">✔</td>
|
540
|
665
|
<td align="center"></td>
|
541
|
666
|
</tr>
|
542
|
667
|
<tr class="odd">
|
543
|
668
|
<td><span style="font-family: 'Courier New', monospace;">pairsDifferencesSelection</span></td>
|
544
|
|
-<td>Finds a set of pairs of features whose measurement inequalities can be used for predicting with</td>
|
|
669
|
+<td>Finds a set of pairs of features whose measurement inequalities can
|
|
670
|
+be used for predicting with</td>
|
545
|
671
|
<td align="center"></td>
|
546
|
672
|
<td align="center">✔</td>
|
547
|
673
|
</tr>
|
548
|
674
|
<tr class="even">
|
549
|
675
|
<td><span style="font-family: 'Courier New', monospace;">kTSPclassifier</span></td>
|
550
|
|
-<td>Voting classifier that uses inequalities between pairs of features to vote for one of two classes</td>
|
|
676
|
+<td>Voting classifier that uses inequalities between pairs of features
|
|
677
|
+to vote for one of two classes</td>
|
551
|
678
|
<td align="center"></td>
|
552
|
679
|
<td align="center">✔</td>
|
553
|
680
|
</tr>
|
...
|
...
|
@@ -556,14 +683,35 @@
|
556
|
683
|
</div>
|
557
|
684
|
</div>
|
558
|
685
|
<div class="section level3">
|
559
|
|
-<h3 id="fine-grained-cross-validation-and-modelling-using-runtests">Fine-grained Cross-validation and Modelling Using <em>runTests</em><a class="anchor" aria-label="anchor" href="#fine-grained-cross-validation-and-modelling-using-runtests"></a>
|
|
686
|
+<h3 id="fine-grained-cross-validation-and-modelling-using-runtests">Fine-grained Cross-validation and Modelling Using
|
|
687
|
+<em>runTests</em><a class="anchor" aria-label="anchor" href="#fine-grained-cross-validation-and-modelling-using-runtests"></a>
|
560
|
688
|
</h3>
|
561
|
|
-<p>For more control over the finer aspects of cross-validation of a single data set, <em>runTests</em> may be employed in place of <em>crossValidate</em>. For the variety of cross-validation, the parameters are specified by a <em>CrossValParams</em> object. The default setting is for 100 permutations and five folds and parameter tuning is done by resubstitution. It is also recommended to specify a <em>parallelParams</em> setting. On Linux and MacOS operating systems, it should be <em>MulticoreParam</em> and on Windows computers it should be <em>SnowParam</em>. Note that each of these have an option <em>RNGseed</em> and this <strong>needs to be set by the user</strong> because some classifiers or feature selection functions will have some element of randomisation. One example that works on all operating systems, but is best-suited to Windows is:</p>
|
562
|
|
-<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
|
|
689
|
+<p>For more control over the finer aspects of cross-validation of a
|
|
690
|
+single data set, <em>runTests</em> may be employed in place of
|
|
691
|
+<em>crossValidate</em>. For the variety of cross-validation, the
|
|
692
|
+parameters are specified by a <em>CrossValParams</em> object. The
|
|
693
|
+default setting is for 100 permutations and five folds and parameter
|
|
694
|
+tuning is done by resubstitution. It is also recommended to specify a
|
|
695
|
+<em>parallelParams</em> setting. On Linux and MacOS operating systems,
|
|
696
|
+it should be <em>MulticoreParam</em> and on Windows computers it should
|
|
697
|
+be <em>SnowParam</em>. Note that each of these have an option
|
|
698
|
+<em>RNGseed</em> and this <strong>needs to be set by the user</strong>
|
|
699
|
+because some classifiers or feature selection functions will have some
|
|
700
|
+element of randomisation. One example that works on all operating
|
|
701
|
+systems, but is best-suited to Windows is:</p>
|
|
702
|
+<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
|
563
|
703
|
<code class="sourceCode R"><span><span class="va">CVparams</span> <span class="op"><-</span> <span class="fu"><a href="../reference/CrossValParams-class.html">CrossValParams</a></span><span class="op">(</span>parallelParams <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/pkg/BiocParallel/man/SnowParam-class.html" class="external-link">SnowParam</a></span><span class="op">(</span><span class="fl">16</span>, RNGseed <span class="op">=</span> <span class="fl">123</span><span class="op">)</span><span class="op">)</span></span>
|
564
|
704
|
<span><span class="va">CVparams</span></span></code></pre></div>
|
565
|
|
-<p>For the actual operations to do to the data to build a model of it, each of the stages should be specified by an object of class <em>ModellingParams</em>. This controls how class imbalance is handled (default is to downsample to the smallest class), any transformation that needs to be done inside of cross-validation (i.e. involving a computed value from the training set), any feature selection and the training and prediction functions to be used. The default is to do an ordinary t-test (two groups) or ANOVA (three or more groups) and classification using diagonal LDA.</p>
|
566
|
|
-<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
|
|
705
|
+<p>For the actual operations to do to the data to build a model of it,
|
|
706
|
+each of the stages should be specified by an object of class
|
|
707
|
+<em>ModellingParams</em>. This controls how class imbalance is handled
|
|
708
|
+(default is to downsample to the smallest class), any transformation
|
|
709
|
+that needs to be done inside of cross-validation (i.e. involving a
|
|
710
|
+computed value from the training set), any feature selection and the
|
|
711
|
+training and prediction functions to be used. The default is to do an
|
|
712
|
+ordinary t-test (two groups) or ANOVA (three or more groups) and
|
|
713
|
+classification using diagonal LDA.</p>
|
|
714
|
+<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
|
567
|
715
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span><span class="op">)</span></span></code></pre></div>
|
568
|
716
|
<pre><code><span><span class="co">## An object of class "ModellingParams"</span></span>
|
569
|
717
|
<span><span class="co">## Slot "balancing":</span></span>
|
...
|
...
|
@@ -589,20 +737,38 @@
|
589
|
737
|
<div class="section level3">
|
590
|
738
|
<h3 id="runtests-driver-function-of-cross-validated-classification">runTests Driver Function of Cross-validated Classification<a class="anchor" aria-label="anchor" href="#runtests-driver-function-of-cross-validated-classification"></a>
|
591
|
739
|
</h3>
|
592
|
|
-<p><em>runTests</em> is the main function in <strong>ClassifyR</strong> which handles the sample splitting and parallelisation, if used, of cross-validation. To begin with, a simple classifier will be demonstrated. It uses a t-test or ANOVA ranking (depending on the number of classes) for feature ranking and DLDA for classification. This classifier relies on differences in means between classes. No parameters need to be specified, because this is the default classification of <em>runTests</em>. By default, the number of features is tuned by resubstitution on the training set.</p>
|
593
|
|
-<div class="sourceCode" id="cb23"><pre class="downlit sourceCode r">
|
|
740
|
+<p><em>runTests</em> is the main function in <strong>ClassifyR</strong>
|
|
741
|
+which handles the sample splitting and parallelisation, if used, of
|
|
742
|
+cross-validation. To begin with, a simple classifier will be
|
|
743
|
+demonstrated. It uses a t-test or ANOVA ranking (depending on the number
|
|
744
|
+of classes) for feature ranking and DLDA for classification. This
|
|
745
|
+classifier relies on differences in means between classes. No parameters
|
|
746
|
+need to be specified, because this is the default classification of
|
|
747
|
+<em>runTests</em>. By default, the number of features is tuned by
|
|
748
|
+resubstitution on the training set.</p>
|
|
749
|
+<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
|
594
|
750
|
<code class="sourceCode R"><span><span class="va">crossValParams</span> <span class="op"><-</span> <span class="fu"><a href="../reference/CrossValParams-class.html">CrossValParams</a></span><span class="op">(</span>permutations <span class="op">=</span> <span class="fl">5</span><span class="op">)</span></span>
|
595
|
751
|
<span><span class="va">DMresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
|
596
|
|
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
597
|
|
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
|
598
|
|
-<p>Here, 5 permutations (non-default) and 5 folds cross-validation (default) is specified. For computers with more than 1 CPU, the number of cores to use can be given to <em>runTests</em> by using the argument <em>parallelParams</em>. The parameter <em>seed</em> is important to set for result reproducibility when doing a cross-validation such as this, because it employs randomisation to partition the samples into folds. Also, <em>RNGseed</em> is highly recommended to be set to the back-end specified to <em>BPPARAM</em> if doing parallel processing. The first seed mentioned does not work for parallel processes. For more details about <em>runTests</em> and the parameter classes used by it, consult the help pages of such functions.</p>
|
|
752
|
+<p>Here, 5 permutations (non-default) and 5 folds cross-validation
|
|
753
|
+(default) is specified. For computers with more than 1 CPU, the number
|
|
754
|
+of cores to use can be given to <em>runTests</em> by using the argument
|
|
755
|
+<em>parallelParams</em>. The parameter <em>seed</em> is important to set
|
|
756
|
+for result reproducibility when doing a cross-validation such as this,
|
|
757
|
+because it employs randomisation to partition the samples into folds.
|
|
758
|
+Also, <em>RNGseed</em> is highly recommended to be set to the back-end
|
|
759
|
+specified to <em>BPPARAM</em> if doing parallel processing. The first
|
|
760
|
+seed mentioned does not work for parallel processes. For more details
|
|
761
|
+about <em>runTests</em> and the parameter classes used by it, consult
|
|
762
|
+the help pages of such functions.</p>
|
599
|
763
|
</div>
|
600
|
764
|
</div>
|
601
|
765
|
<div class="section level2">
|
602
|
766
|
<h2 id="evaluation-of-a-classification">Evaluation of a Classification<a class="anchor" aria-label="anchor" href="#evaluation-of-a-classification"></a>
|
603
|
767
|
</h2>
|
604
|
|
-<p>The most frequently selected gene can be identified using the <em>distribution</em> function and its relative abundance values for all samples can be displayed visually by <em>plotFeatureClasses</em>.</p>
|
605
|
|
-<div class="sourceCode" id="cb26"><pre class="downlit sourceCode r">
|
|
768
|
+<p>The most frequently selected gene can be identified using the
|
|
769
|
+<em>distribution</em> function and its relative abundance values for all
|
|
770
|
+samples can be displayed visually by <em>plotFeatureClasses</em>.</p>
|
|
771
|
+<div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
|
606
|
772
|
<code class="sourceCode R"><span><span class="va">selectionPercentages</span> <span class="op"><-</span> <span class="fu"><a href="../reference/distribution.html">distribution</a></span><span class="op">(</span><span class="va">DMresults</span>, plot <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
|
607
|
773
|
<span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">selectionPercentages</span><span class="op">)</span></span>
|
608
|
774
|
<span><span class="va">sortedPercentages</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">selectionPercentages</span><span class="op">[</span><span class="fu"><a href="https://rdrr.io/r/base/order.html" class="external-link">order</a></span><span class="op">(</span><span class="va">selectionPercentages</span>, decreasing <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span><span class="op">]</span><span class="op">)</span></span>
|
...
|
...
|
@@ -616,15 +782,24 @@
|
616
|
782
|
<span><span class="co">## Please report the issue to the authors.</span></span></code></pre>
|
617
|
783
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-11-1.png" width="768"></p>
|
618
|
784
|
<pre><code><span><span class="co">## allFeaturesText</span></span>
|
619
|
|
-<span><span class="co">## ANKMY1 ARHGAP39 C10orf95 C19orf51 C2orf55 C6orf108 </span></span>
|
620
|
|
-<span><span class="co">## 8 64 100 80 4 12 </span></span>
|
|
785
|
+<span><span class="co">## ARHGAP39 C10orf95 C19orf51 C6orf108 C6orf154 C6orf27 </span></span>
|
|
786
|
+<span><span class="co">## 60 96 48 8 12 8 </span></span>
|
621
|
787
|
<span><span class="co">## allFeaturesText</span></span>
|
622
|
|
-<span><span class="co">## C10orf95 CROCC SSBP4 ZDHHC1 TMEM190 C19orf51 </span></span>
|
623
|
|
-<span><span class="co">## 100 100 100 100 84 80</span></span></code></pre>
|
624
|
|
-<p>The means of the abundance levels of C10orf95 are substantially different between the people with and without asthma. <em>plotFeatureClasses</em> can also plot categorical data, such as may be found in a clinical data table, as a bar chart.</p>
|
625
|
|
-<p>Classification error rates, as well as many other prediction performance measures, can be calculated with <em>calcCVperformance</em>. Next, the balanced accuracy rate is calculated considering all samples, each of which was in the test set once. The balanced accuracy rate is defined as the average rate of the correct classifications of each class.</p>
|
626
|
|
-<p>See the documentation of <em>calcCVperformance</em> for a list of performance metrics which may be calculated.</p>
|
627
|
|
-<div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
|
|
788
|
+<span><span class="co">## SSBP4 ZDHHC1 C10orf95 CROCC TMEM190 CTXN1 </span></span>
|
|
789
|
+<span><span class="co">## 100 100 96 96 76 72</span></span></code></pre>
|
|
790
|
+<p>The means of the abundance levels of SSBP4 are substantially
|
|
791
|
+different between the people with and without asthma.
|
|
792
|
+<em>plotFeatureClasses</em> can also plot categorical data, such as may
|
|
793
|
+be found in a clinical data table, as a bar chart.</p>
|
|
794
|
+<p>Classification error rates, as well as many other prediction
|
|
795
|
+performance measures, can be calculated with <em>calcCVperformance</em>.
|
|
796
|
+Next, the balanced accuracy rate is calculated considering all samples,
|
|
797
|
+each of which was in the test set once. The balanced accuracy rate is
|
|
798
|
+defined as the average rate of the correct classifications of each
|
|
799
|
+class.</p>
|
|
800
|
+<p>See the documentation of <em>calcCVperformance</em> for a list of
|
|
801
|
+performance metrics which may be calculated.</p>
|
|
802
|
+<div class="sourceCode" id="cb25"><pre class="downlit sourceCode r">
|
628
|
803
|
<code class="sourceCode R"><span><span class="va">DMresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DMresults</span><span class="op">)</span></span>
|
629
|
804
|
<span><span class="va">DMresults</span></span></code></pre></div>
|
630
|
805
|
<pre><code><span><span class="co">## An object of class 'ClassifyResult'.</span></span>
|
...
|
...
|
@@ -636,25 +811,32 @@
|
636
|
811
|
<span><span class="co">## Features: List of length 25 of feature identifiers.</span></span>
|
637
|
812
|
<span><span class="co">## Predictions: A data frame of 950 rows.</span></span>
|
638
|
813
|
<span><span class="co">## Performance Measures: Balanced Accuracy.</span></span></code></pre>
|
639
|
|
-<div class="sourceCode" id="cb31"><pre class="downlit sourceCode r">
|
|
814
|
+<div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
|
640
|
815
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/ClassifyResult-class.html">performance</a></span><span class="op">(</span><span class="va">DMresults</span><span class="op">)</span></span></code></pre></div>
|
641
|
816
|
<pre><code><span><span class="co">## $`Balanced Accuracy`</span></span>
|
642
|
817
|
<span><span class="co">## 1 2 3 4 5 </span></span>
|
643
|
|
-<span><span class="co">## 0.7850684 0.7931329 0.8011975 0.8047410 0.8077957</span></span></code></pre>
|
644
|
|
-<p>The error rate is about 20%. If only a vector of predictions and a vector of actual classes is available, such as from an old study which did not use <strong>ClassifyR</strong> for cross-validation, then <em>calcExternalPerformance</em> can be used on a pair of factor vectors which have the same length.</p>
|
|
818
|
+<span><span class="co">## 0.8047410 0.7997312 0.7926442 0.8047410 0.7931329</span></span></code></pre>
|
|
819
|
+<p>The error rate is about 20%. If only a vector of predictions and a
|
|
820
|
+vector of actual classes is available, such as from an old study which
|
|
821
|
+did not use <strong>ClassifyR</strong> for cross-validation, then
|
|
822
|
+<em>calcExternalPerformance</em> can be used on a pair of factor vectors
|
|
823
|
+which have the same length.</p>
|
645
|
824
|
<div class="section level3">
|
646
|
825
|
<h3 id="comparison-of-different-classifications">Comparison of Different Classifications<a class="anchor" aria-label="anchor" href="#comparison-of-different-classifications"></a>
|
647
|
826
|
</h3>
|
648
|
|
-<p>The <em>samplesMetricMap</em> function allows the visual comparison of sample-wise error rate or accuracy measures from different <em>ClassifyResult</em> objects. Firstly, a classifier will be run that uses Kullback-Leibler divergence ranking and resubstitution error as a feature selection heuristic and a naive Bayes classifier for classification. This classification will use features that have either a change in location or in scale between classes.</p>
|
649
|
|
-<div class="sourceCode" id="cb33"><pre class="downlit sourceCode r">
|
|
827
|
+<p>The <em>samplesMetricMap</em> function allows the visual comparison
|
|
828
|
+of sample-wise error rate or accuracy measures from different
|
|
829
|
+<em>ClassifyResult</em> objects. Firstly, a classifier will be run that
|
|
830
|
+uses Kullback-Leibler divergence ranking and resubstitution error as a
|
|
831
|
+feature selection heuristic and a naive Bayes classifier for
|
|
832
|
+classification. This classification will use features that have either a
|
|
833
|
+change in location or in scale between classes.</p>
|
|
834
|
+<div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
|
650
|
835
|
<code class="sourceCode R"><span><span class="va">modellingParamsDD</span> <span class="op"><-</span> <span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span>selectParams <span class="op">=</span> <span class="fu"><a href="../reference/SelectParams-class.html">SelectParams</a></span><span class="op">(</span><span class="st">"KL"</span><span class="op">)</span>,</span>
|
651
|
836
|
<span> trainParams <span class="op">=</span> <span class="fu"><a href="../reference/TrainParams-class.html">TrainParams</a></span><span class="op">(</span><span class="st">"naiveBayes"</span><span class="op">)</span>,</span>
|
652
|
837
|
<span> predictParams <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span>
|
653
|
|
-<span><span class="va">DDresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">modellingParamsDD</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
|
654
|
|
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
655
|
|
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
|
656
|
|
-<div class="sourceCode" id="cb36"><pre class="downlit sourceCode r">
|
657
|
|
-<code class="sourceCode R"><span><span class="va">DDresults</span></span></code></pre></div>
|
|
838
|
+<span><span class="va">DDresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">modellingParamsDD</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span>
|
|
839
|
+<span><span class="va">DDresults</span></span></code></pre></div>
|
658
|
840
|
<pre><code><span><span class="co">## An object of class 'ClassifyResult'.</span></span>
|
659
|
841
|
<span><span class="co">## Characteristics:</span></span>
|
660
|
842
|
<span><span class="co">## characteristic value</span></span>
|
...
|
...
|
@@ -664,9 +846,15 @@
|
664
|
846
|
<span><span class="co">## Features: List of length 25 of feature identifiers.</span></span>
|
665
|
847
|
<span><span class="co">## Predictions: A data frame of 950 rows.</span></span>
|
666
|
848
|
<span><span class="co">## Performance Measures: None calculated yet.</span></span></code></pre>
|
667
|
|
-<p>The naive Bayes kernel classifier by default uses the vertical distance between class densities but it can instead use the horizontal distance to the nearest non-zero density cross-over point to confidently classify samples in the tails of the densities.</p>
|
668
|
|
-<p>Now, the classification error for each sample is also calculated for both the differential means and differential distribution classifiers and both <em>ClassifyResult</em> objects generated so far are plotted with <em>samplesMetricMap</em>.</p>
|
669
|
|
-<div class="sourceCode" id="cb38"><pre class="downlit sourceCode r">
|
|
849
|
+<p>The naive Bayes kernel classifier by default uses the vertical
|
|
850
|
+distance between class densities but it can instead use the horizontal
|
|
851
|
+distance to the nearest non-zero density cross-over point to confidently
|
|
852
|
+classify samples in the tails of the densities.</p>
|
|
853
|
+<p>Now, the classification error for each sample is also calculated for
|
|
854
|
+both the differential means and differential distribution classifiers
|
|
855
|
+and both <em>ClassifyResult</em> objects generated so far are plotted
|
|
856
|
+with <em>samplesMetricMap</em>.</p>
|
|
857
|
+<div class="sourceCode" id="cb31"><pre class="downlit sourceCode r">
|
670
|
858
|
<code class="sourceCode R"><span><span class="va">DMresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DMresults</span>, <span class="st">"Sample Error"</span><span class="op">)</span></span>
|
671
|
859
|
<span><span class="va">DDresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DDresults</span>, <span class="st">"Sample Error"</span><span class="op">)</span></span>
|
672
|
860
|
<span><span class="va">resultsList</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>Abundance <span class="op">=</span> <span class="va">DMresults</span>, Distribution <span class="op">=</span> <span class="va">DDresults</span><span class="op">)</span></span>
|
...
|
...
|
@@ -678,101 +866,178 @@
|
678
|
866
|
<span><span class="co">## z cells name grob</span></span>
|
679
|
867
|
<span><span class="co">## 1 1 (2-2,1-1) arrange gtable[layout]</span></span>
|
680
|
868
|
<span><span class="co">## 2 2 (1-1,1-1) arrange text[GRID.text.533]</span></span></code></pre>
|
681
|
|
-<p>The benefit of this plot is that it allows the easy identification of samples which are hard to classify and could be explained by considering additional information about them. Differential distribution class prediction appears to be biased to the majority class (No Asthma).</p>
|
682
|
|
-<p>More traditionally, the distribution of performance values of each complete cross-validation can be visualised by <em>performancePlot</em> by providing them as a list to the function. The default is to draw box plots, but violin plots could also be made. The default performance metric to plot is balanced accuracy. If it’s not already calculated for all classifications, as in this case for DD, it will be done automatically.</p>
|
683
|
|
-<div class="sourceCode" id="cb41"><pre class="downlit sourceCode r">
|
|
869
|
+<p>The benefit of this plot is that it allows the easy identification of
|
|
870
|
+samples which are hard to classify and could be explained by considering
|
|
871
|
+additional information about them. Differential distribution class
|
|
872
|
+prediction appears to be biased to the majority class (No Asthma).</p>
|
|
873
|
+<p>More traditionally, the distribution of performance values of each
|
|
874
|
+complete cross-validation can be visualised by <em>performancePlot</em>
|
|
875
|
+by providing them as a list to the function. The default is to draw box
|
|
876
|
+plots, but violin plots could also be made. The default performance
|
|
877
|
+metric to plot is balanced accuracy. If it’s not already calculated for
|
|
878
|
+all classifications, as in this case for DD, it will be done
|
|
879
|
+automatically.</p>
|
|
880
|
+<div class="sourceCode" id="cb34"><pre class="downlit sourceCode r">
|
684
|
881
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">resultsList</span><span class="op">)</span></span></code></pre></div>
|
685
|
882
|
<pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
|
686
|
883
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-15-1.png" width="700"></p>
|
687
|
|
-<p>We can observe that the spread of balanced accuracy rates is small, but slightly wider for the differential distribution classifier.</p>
|
688
|
|
-<p>The features being ranked and selected in the feature selection stage can be compared within and between classifiers by the plotting functions <em>rankingPlot</em> and <em>selectionPlot</em>. Consider the task of visually representing how consistent the feature rankings of the top 100 different features were for the differential distribution classifier for all 5 folds in the 5 cross-validations.</p>
|
689
|
|
-<div class="sourceCode" id="cb43"><pre class="downlit sourceCode r">
|
|
884
|
+<p>We can observe that the spread of balanced accuracy rates is small,
|
|
885
|
+but slightly wider for the differential distribution classifier.</p>
|
|
886
|
+<p>The features being ranked and selected in the feature selection stage
|
|
887
|
+can be compared within and between classifiers by the plotting functions
|
|
888
|
+<em>rankingPlot</em> and <em>selectionPlot</em>. Consider the task of
|
|
889
|
+visually representing how consistent the feature rankings of the top 100
|
|
890
|
+different features were for the differential distribution classifier for
|
|
891
|
+all 5 folds in the 5 cross-validations.</p>
|
|
892
|
+<div class="sourceCode" id="cb36"><pre class="downlit sourceCode r">
|
690
|
893
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/rankingPlot.html">rankingPlot</a></span><span class="op">(</span><span class="va">DDresults</span>, topRanked <span class="op">=</span> <span class="fl">1</span><span class="op">:</span><span class="fl">100</span>, xLabelPositions <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1</span>, <span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">10</span>, <span class="fl">100</span>, <span class="fl">10</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
|
691
|
894
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-16-1.png" width="700"></p>
|
692
|
|
-<p>The top-ranked features are fairly similar between all pairs of the 20 cross-validations.</p>
|
693
|
|
-<p>For a large cross-validation scheme, such as leave-2-out cross-validation, or when <em>results</em> contains many classifications, there are many feature set comparisons to make. Note that <em>rankingPlot</em> and <em>selectionPlot</em> have a <em>parallelParams</em> options which allows for the calculation of feature set overlaps to be done on multiple processors.</p>
|
|
895
|
+<p>The top-ranked features are fairly similar between all pairs of the
|
|
896
|
+20 cross-validations.</p>
|
|
897
|
+<p>For a large cross-validation scheme, such as leave-2-out
|
|
898
|
+cross-validation, or when <em>results</em> contains many
|
|
899
|
+classifications, there are many feature set comparisons to make. Note
|
|
900
|
+that <em>rankingPlot</em> and <em>selectionPlot</em> have a
|
|
901
|
+<em>parallelParams</em> options which allows for the calculation of
|
|
902
|
+feature set overlaps to be done on multiple processors.</p>
|
694
|
903
|
</div>
|
695
|
904
|
<div class="section level3">
|
696
|
905
|
<h3 id="generating-a-roc-plot">Generating a ROC Plot<a class="anchor" aria-label="anchor" href="#generating-a-roc-plot"></a>
|
697
|
906
|
</h3>
|
698
|
|
-<p>Some classifiers can output scores or probabilities representing how likely a sample is to be from one of the classes, instead of, or as well as, class labels. This enables different score thresholds to be tried, to generate pairs of false positive and false negative rates. The naive Bayes classifier used previously by default has its <em>returnType</em> parameter set to <em>“both”</em>, so class predictions and scores are both stored in the classification result. So does diagonal LDA. In this case, a data frame with class predictions and scores for each class is returned by the classifier to the cross-validation framework. Setting <em>returnType</em> to <em>“score”</em> for a classifier which has such an option is also sufficient to generate a ROC plot. Many existing classifiers in other R packages also have an option that allows a score or probability to be calculated.</p>
|
699
|
|
-<p>By default, scores from different iterations of prediction are merged and one line is drawn per classification. Alternatively, setting <em>mode = “average”</em> will consider each iteration of prediction separately, average them and also calculate and draw confidence intervals. The default interval is a 95% interval and is customisable by setting <em>interval</em>.</p>
|
700
|
|
-<div class="sourceCode" id="cb44"><pre class="downlit sourceCode r">
|
|
907
|
+<p>Some classifiers can output scores or probabilities representing how
|
|
908
|
+likely a sample is to be from one of the classes, instead of, or as well
|
|
909
|
+as, class labels. This enables different score thresholds to be tried,
|
|
910
|
+to generate pairs of false positive and false negative rates. The naive
|
|
911
|
+Bayes classifier used previously by default has its <em>returnType</em>
|
|
912
|
+parameter set to <em>“both”</em>, so class predictions and scores are
|
|
913
|
+both stored in the classification result. So does diagonal LDA. In this
|
|
914
|
+case, a data frame with class predictions and scores for each class is
|
|
915
|
+returned by the classifier to the cross-validation framework. Setting
|
|
916
|
+<em>returnType</em> to <em>“score”</em> for a classifier which has such
|
|
917
|
+an option is also sufficient to generate a ROC plot. Many existing
|
|
918
|
+classifiers in other R packages also have an option that allows a score
|
|
919
|
+or probability to be calculated.</p>
|
|
920
|
+<p>By default, scores from different iterations of prediction are merged
|
|
921
|
+and one line is drawn per classification. Alternatively, setting
|
|
922
|
+<em>mode = “average”</em> will consider each iteration of prediction
|
|
923
|
+separately, average them and also calculate and draw confidence
|
|
924
|
+intervals. The default interval is a 95% interval and is customisable by
|
|
925
|
+setting <em>interval</em>.</p>
|
|
926
|
+<div class="sourceCode" id="cb37"><pre class="downlit sourceCode r">
|
701
|
927
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/ROCplot.html">ROCplot</a></span><span class="op">(</span><span class="va">resultsList</span>, fontSizes <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">24</span>, <span class="fl">12</span>, <span class="fl">12</span>, <span class="fl">12</span>, <span class="fl">12</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
|
702
|
928
|
<p><img src="ClassifyR_files/figure-html/unnamed-chunk-17-1.png" width="576"></p>
|
703
|
|
-<p>This ROC plot shows the classifiability of the asthma data set is high. Some examples of functions which output scores are <em>fisherDiscriminant</em>, <em>DLDApredictInterface</em>, and <em>SVMpredictInterface</em>.</p>
|
|
929
|
+<p>This ROC plot shows the classifiability of the asthma data set is
|
|
930
|
+high. Some examples of functions which output scores are
|
|
931
|
+<em>fisherDiscriminant</em>, <em>DLDApredictInterface</em>, and
|
|
932
|
+<em>SVMpredictInterface</em>.</p>
|
704
|
933
|
</div>
|
705
|
934
|
</div>
|
706
|
935
|
<div class="section level2">
|
707
|
936
|
<h2 id="other-use-cases">Other Use Cases<a class="anchor" aria-label="anchor" href="#other-use-cases"></a>
|
708
|
937
|
</h2>
|
709
|
|
-<p>Apart from cross-validation of one data set, ClassifyR can be used in a couple of other ways.</p>
|
|
938
|
+<p>Apart from cross-validation of one data set, ClassifyR can be used in
|
|
939
|
+a couple of other ways.</p>
|
710
|
940
|
<div class="section level3">
|
711
|
941
|
<h3 id="using-an-independent-test-set">Using an Independent Test Set<a class="anchor" aria-label="anchor" href="#using-an-independent-test-set"></a>
|
712
|
942
|
</h3>
|
713
|
|
-<p>Sometimes, cross-validation is unnecessary. This happens when studies have large sample sizes and are designed such that a large number of samples is prespecified to form a test set. The classifier is only trained on the training sample set, and makes predictions only on the test sample set. This can be achieved by using the function <em>runTest</em> directly. See its documentation for required inputs.</p>
|
|
943
|
+<p>Sometimes, cross-validation is unnecessary. This happens when studies
|
|
944
|
+have large sample sizes and are designed such that a large number of
|
|
945
|
+samples is prespecified to form a test set. The classifier is only
|
|
946
|
+trained on the training sample set, and makes predictions only on the
|
|
947
|
+test sample set. This can be achieved by using the function
|
|
948
|
+<em>runTest</em> directly. See its documentation for required
|
|
949
|
+inputs.</p>
|
714
|
950
|
</div>
|
715
|
951
|
<div class="section level3">
|
716
|
952
|
<h3 id="cross-validating-selected-features-on-a-different-data-set">Cross-validating Selected Features on a Different Data Set<a class="anchor" aria-label="anchor" href="#cross-validating-selected-features-on-a-different-data-set"></a>
|
717
|
953
|
</h3>
|
718
|
|
-<p>Once a cross-validated classification is complete, the usefulness of the features selected may be explored in another dataset. <em>previousSelection</em> is a function which takes an existing <em>ClassifyResult</em> object and returns the features selected at the equivalent iteration which is currently being processed. This is necessary, because the models trained on one data set are not directly transferrable to a new dataset; the classifier training (e.g. choosing thresholds, fitting model coefficients) is redone. Of course, the features in the new dataset should have the same naming system as the ones in the old dataset.</p>
|
|
954
|
+<p>Once a cross-validated classification is complete, the usefulness of
|
|
955
|
+the features selected may be explored in another dataset.
|
|
956
|
+<em>previousSelection</em> is a function which takes an existing
|
|
957
|
+<em>ClassifyResult</em> object and returns the features selected at the
|
|
958
|
+equivalent iteration which is currently being processed. This is
|
|
959
|
+necessary, because the models trained on one data set are not directly
|
|
960
|
+transferrable to a new dataset; the classifier training (e.g. choosing
|
|
961
|
+thresholds, fitting model coefficients) is redone. Of course, the
|
|
962
|
+features in the new dataset should have the same naming system as the
|
|
963
|
+ones in the old dataset.</p>
|
719
|
964
|
</div>
|
720
|
965
|
<div class="section level3">
|
721
|
966
|
<h3 id="parameter-tuning">Parameter Tuning<a class="anchor" aria-label="anchor" href="#parameter-tuning"></a>
|
722
|
967
|
</h3>
|
723
|
|
-<p>Some feature ranking methods or classifiers allow the choosing of tuning parameters, which controls some aspect of their model learning. An example of doing parameter tuning with a linear SVM is presented. This particular SVM has a single tuning parameter, the cost. Higher values of this parameter penalise misclassifications more. Moreover, feature selection happens by using a feature ranking function and then trying a range of top-ranked features to see which gives the best performance, the range being specified by a list element named <em>nFeatures</em> and the performance type (e.g. Balanced Accuracy) specified by a list element named <em>performanceType</em>. Therefore, some kind of parameter tuning always happens, even if the feature ranking or classifier function does not have any explicit tuning parameters.</p>
|
724
|
|
-<p>Tuning is achieved in ClassifyR by providing a variable called <em>tuneParams</em> to the SelectParams or TrainParams constructor. <em>tuneParams</em> is a named list, with the names being the names of the tuning variables, except for one which is named <em>“performanceType”</em> and specifies the performance metric to use for picking the parameter values. Any of the non-sample-specific performance metrics which <em>calcCVperformance</em> calculates can be optimised.</p>
|
725
|
|
-<div class="sourceCode" id="cb45"><pre class="downlit sourceCode r">
|
|
968
|
+<p>Some feature ranking methods or classifiers allow the choosing of
|
|
969
|
+tuning parameters, which controls some aspect of their model learning.
|
|
970
|
+An example of doing parameter tuning with a linear SVM is presented.
|
|
971
|
+This particular SVM has a single tuning parameter, the cost. Higher
|
|
972
|
+values of this parameter penalise misclassifications more. Moreover,
|
|
973
|
+feature selection happens by using a feature ranking function and then
|
|
974
|
+trying a range of top-ranked features to see which gives the best
|
|
975
|
+performance, the range being specified by a list element named
|
|
976
|
+<em>nFeatures</em> and the performance type (e.g. Balanced Accuracy)
|
|
977
|
+specified by a list element named <em>performanceType</em>. Therefore,
|
|
978
|
+some kind of parameter tuning always happens, even if the feature
|
|
979
|
+ranking or classifier function does not have any explicit tuning
|
|
980
|
+parameters.</p>
|
|
981
|
+<p>Tuning is achieved in ClassifyR by providing a variable called
|
|
982
|
+<em>tuneParams</em> to the SelectParams or TrainParams constructor.
|
|
983
|
+<em>tuneParams</em> is a named list, with the names being the names of
|
|
984
|
+the tuning variables, except for one which is named
|
|
985
|
+<em>“performanceType”</em> and specifies the performance metric to use
|
|
986
|
+for picking the parameter values. Any of the non-sample-specific
|
|
987
|
+performance metrics which <em>calcCVperformance</em> calculates can be
|
|
988
|
+optimised.</p>
|
|
989
|
+<div class="sourceCode" id="cb38"><pre class="downlit sourceCode r">
|
726
|
990
|
<code class="sourceCode R"><span><span class="va">tuneList</span> <span class="op"><-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>cost <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.01</span>, <span class="fl">0.1</span>, <span class="fl">1</span>, <span class="fl">10</span><span class="op">)</span><span class="op">)</span></span>
|
727
|
991
|
<span><span class="va">SVMparams</span> <span class="op"><-</span> <span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span>trainParams <span class="op">=</span> <span class="fu"><a href="../reference/TrainParams-class.html">TrainParams</a></span><span class="op">(</span><span class="st">"SVM"</span>, kernel <span class="op">=</span> <span class="st">"linear"</span>, tuneParams <span class="op">=</span> <span class="va">tuneList</span><span class="op">)</span>,</span>
|
728
|
992
|
<span> predictParams <span class="op">=</span> <span class="fu"><a href="../reference/PredictParams-class.html">PredictParams</a></span><span class="op">(</span><span class="st">"SVM"</span><span class="op">)</span><span class="op">)</span></span>
|
729
|
993
|
<span><span class="va">SVMresults</span> <span class="op"><-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">SVMparams</span><span class="op">)</span></span></code></pre></div>
|
730
|
|
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
|
731
|
|
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
|
732
|
|
-<p>The index of chosen of the parameters, as well as all combinations of parameters and their associated performance metric, are stored for every validation, and can be accessed with the <em>tunedParameters</em> function.</p>
|
733
|
|
-<div class="sourceCode" id="cb48"><pre class="downlit sourceCode r">
|
|
994
|
+<p>The index of chosen of the parameters, as well as all combinations of
|
|
995
|
+parameters and their associated performance metric, are stored for every
|
|
996
|
+validation, and can be accessed with the <em>tunedParameters</em>
|
|
997
|
+function.</p>
|
|
998
|
+<div class="sourceCode" id="cb39"><pre class="downlit sourceCode r">
|
734
|
999
|
<code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/length.html" class="external-link">length</a></span><span class="op">(</span><span class="fu"><a href="../reference/ClassifyResult-class.html">tunedParameters</a></span><span class="op">(</span><span class="va">SVMresults</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
|
735
|
1000
|
<pre><code><span><span class="co">## [1] 25</span></span></code></pre>
|
736
|
|
-<div class="sourceCode" id="cb50"><pre class="downlit sourceCode r">
|
|
1001
|
+<div class="sourceCode" id="cb41"><pre class="downlit sourceCode r">
|
737
|
1002
|
<code class="sourceCode R"><span><span class="fu"><a href="../reference/ClassifyResult-class.html">tunedParameters</a></span><span class="op">(</span><span class="va">SVMresults</span><span class="op">)</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span></code></pre></div>
|
738
|
1003
|
<pre><code><span><span class="co">## [[1]]</span></span>
|
739
|
1004
|
<span><span class="co">## [[1]]$tuneCombinations</span></span>
|
740
|
1005
|
<span><span class="co">## topN cost Balanced Accuracy</span></span>
|
741
|
|
-<span><span class="co">## 1 10 0.01 0.8507719</span></span>
|
742
|
|
-<span><span class="co">## 2 20 0.01 0.8551553</span></span>
|
743
|
|
-<span><span class="co">## 3 30 0.01 0.8696398</span></span>
|
744
|
|
-<span><span class="co">## 4 40 0.01 0.9073756</span></span>
|
745
|
|
-<span><span class="co">## 5 50 0.01 0.8986087</span></span>
|
746
|
|
-<span><span class="co">## 6 60 0.01 0.8986087</span></span>
|
747
|
|
-<span><span class="co">## 7 70 0.01 0.8942253</span></span>
|
748
|
|
-<span><span class="co">## 8 80 0.01 0.9036592</span></span>
|
749
|
|
-<span><span class="co">## 9 90 0.01 0.9036592</span></span>
|
750
|
|
-<span><span class="co">## 10 100 0.01 0.8986087</span></span>
|
751
|
|
-<span><span class="co">## 11 10 0.10 0.8608729</span></span>
|
752
|
|
-<span><span class="co">## 12 20 0.10 0.8942253</span></span>
|
753
|
|
-<span><span class="co">## 13 30 0.10 0.8746903</span></span>
|
754
|
|
-<span><span class="co">## 14 40 0.10 0.9188107</span></span>
|
755
|
|
-<span><span class="co">## 15 50 0.10 0.9087097</span></span>
|
756
|
|
-<span><span class="co">## 16 60 0.10 0.9137602</span></span>
|
757
|
|
-<span><span class="co">## 17 70 0.10 0.9188107</span></span>
|
758
|
|
-<span><span class="co">## 18 80 0.10 0.9137602</span></span>
|
759
|
|
-<span><span class="co">## 19 90 0.10 0.9238613</span></span>
|
760
|
|
-<span><span class="co">## 20 100 0.10 0.9477797</span></span>
|
761
|
|
-<span><span class="co">## 21 10 1.00 0.8992758</span></span>
|
762
|
|
-<span><span class="co">## 22 20 1.00 0.8898418</span></span>
|
763
|
|
-<span><span class="co">## 23 30 1.00 0.9144273</span></span>
|
764
|
|
-<span><span class="co">## 24 40 1.00 0.9049933</span></span>
|
765
|
|
-<span><span class="co">## 25 50 1.00 0.9666476</span></span>
|
766
|
|
-<span><span class="co">## 26 60 1.00 0.9811321</span></span>
|
767
|
|
-<span><span class="co">## 27 70 1.00 0.9855155</span></span>
|
768
|
|
-<span><span class="co">## 28 80 1.00 1.0000000</span></span>
|
769
|
|
-<span><span class="co">## 29 90 1.00 1.0000000</span></span>
|
|
1006
|
+<span><span class="co">## 1 10 0.01 0.7746331</span></span>
|
|
1007
|
+<span><span class="co">## 2 20 0.01 0.7847341</span></span>
|
|
1008
|
+<span><span class="co">## 3 30 0.01 0.7746331</span></span>
|
|
1009
|
+<span><span class="co">## 4 40 0.01 0.8167524</span></span>
|
|
1010
|
+<span><span class="co">## 5 50 0.01 0.8022680</span></span>
|
|
1011
|
+<span><span class="co">## 6 60 0.01 0.8457214</span></span>
|
|
1012
|
+<span><span class="co">## 7 70 0.01 0.8551553</span></span>
|
|
1013
|
+<span><span class="co">## 8 80 0.01 0.8551553</span></span>
|
|
1014
|
+<span><span class="co">## 9 90 0.01 0.8645893</span></span>
|
|
1015
|
+<span><span class="co">## 10 100 0.01 0.8885077</span></span>
|
|
1016
|
+<span><span class="co">## 11 10 0.10 0.8218029</span></span>
|
|
1017
|
+<span><span class="co">## 12 20 0.10 0.8608729</span></span>
|
|
1018
|
+<span><span class="co">## 13 30 0.10 0.8797408</span></span>
|
|
1019
|
+<span><span class="co">## 14 40 0.10 0.9043263</span></span>
|
|
1020
|
+<span><span class="co">## 15 50 0.10 0.9238613</span></span>
|
|
1021
|
+<span><span class="co">## 16 60 0.10 0.9282447</span></span>
|
|
1022
|
+<span><span class="co">## 17 70 0.10 0.9231942</span></span>
|
|
1023
|
+<span><span class="co">## 18 80 0.10 0.9282447</span></span>
|
|
1024
|
+<span><span class="co">## 19 90 0.10 0.9137602</span></span>
|
|
1025
|
+<span><span class="co">## 20 100 0.10 0.9622642</span></span>
|
|
1026
|
+<span><span class="co">## 21 10 1.00 0.8709739</span></span>
|
|
1027
|
+<span><span class="co">## 22 20 1.00 0.8659234</span></span>
|
|
1028
|
+<span><span class="co">## 23 30 1.00 0.8716409</span></span>
|
|
1029
|
+<span><span class="co">## 24 40 1.00 0.8861254</span></span>
|
|
1030
|
+<span><span class="co">## 25 50 1.00 0.9383457</span></span>
|
|
1031
|
+<span><span class="co">## 26 60 1.00 0.9289118</span></span>
|
|
1032
|
+<span><span class="co">## 27 70 1.00 0.9615971</span></span>
|
|
1033
|
+<span><span class="co">## 28 80 1.00 0.9666476</span></span>
|
|
1034
|
+<span><span class="co">## 29 90 1.00 0.9760816</span></span>
|
770
|
1035
|
<span><span class="co">## 30 100 1.00 1.0000000</span></span>
|
771
|
|
-<span><span class="co">## 31 10 10.00 0.9043263</span></span>
|
772
|
|
-<span><span class="co">## 32 20 10.00 0.8905089</span></span>
|
773
|
|
-<span><span class="co">## 33 30 10.00 0.9289118</span></span>
|
774
|
|
-<span><span class="co">## 34 40 10.00 0.9855155</span></span>
|
775
|
|
-<span><span class="co">## 35 50 10.00 1.0000000</span></span>
|
|
1036
|
+<span><span class="co">## 31 10 10.00 0.8652563</span></span>
|
|
1037
|
+<span><span class="co">## 32 20 10.00 0.8672575</span></span>
|
|
1038
|
+<span><span class="co">## 33 30 10.00 0.8955594</span></span>
|
|
1039
|
+<span><span class="co">## 34 40 10.00 0.9477797</span></span>
|
|
1040
|
+<span><span class="co">## 35 50 10.00 0.9949495</span></span>
|
776
|
1041
|
<span><span class="co">## 36 60 10.00 1.0000000</span></span>
|
777
|
1042
|
<span><span class="co">## 37 70 10.00 1.0000000</span></span>
|
778
|
1043
|
<span><span class="co">## 38 80 10.00 1.0000000</span></span>
|
...
|
...
|
@@ -780,95 +1045,95 @@
|
780
|
1045
|
<span><span class="co">## 40 100 10.00 1.0000000</span></span>
|
781
|
1046
|
<span><span class="co">## </span></span>
|
782
|
1047
|
<span><span class="co">## [[1]]$bestIndex</span></span>
|
783
|
|
-<span><span class="co">## [1] 28</span></span>
|
|
1048
|
+<span><span class="co">## [1] 30</span></span>
|
784
|
1049
|
<span><span class="co">## </span></span>
|
785
|
1050
|
<span><span class="co">## </span></span>
|
786
|
1051
|
<span><span class="co">## [[2]]</span></span>
|
787
|
1052
|
<span><span class="co">## [[2]]$tuneCombinations</span></span>
|
788
|
1053
|
<span><span class="co">## topN cost Balanced Accuracy</span></span>
|
789
|
|
-<span><span class="co">## 1 10 0.01 0.8066514</span></span>
|
790
|
|
-<span><span class="co">## 2 20 0.01 0.7783495</span></span>
|
791
|
|
-<span><span class="co">## 3 30 0.01 0.7877835</span></span>
|
792
|
|
-<span><span class="co">## 4 40 0.01 0.7783495</span></span>
|
793
|
|
-<span><span class="co">## 5 50 0.01 0.8117019</span></span>
|
794
|
|
-<span><span class="co">## 6 60 0.01 0.8117019</span></span>
|
795
|
|
-<span><span class="co">## 7 70 0.01 0.8117019</span></span>
|
796
|
|
-<span><span class="co">## 8 80 0.01 0.8261864</span></span>
|
797
|
|
-<span><span class="co">## 9 90 0.01 0.8261864</span></span>
|
798
|
|
-<span><span class="co">## 10 100 0.01 0.8261864</span></span>
|
799
|
|
-<span><span class="co">## 11 10 0.10 0.7928340</span></span>
|
800
|
|
-<span><span class="co">## 12 20 0.10 0.8029350</span></span>
|
801
|
|
-<span><span class="co">## 13 30 0.10 0.8406709</span></span>
|
802
|
|
-<span><span class="co">## 14 40 0.10 0.8406709</span></span>
|
803
|
|
-<span><span class="co">## 15 50 0.10 0.8457214</span></span>
|
804
|
|
-<span><span class="co">## 16 60 0.10 0.8551553</span></span>
|
805
|
|
-<span><span class="co">## 17 70 0.10 0.9181437</span></span>
|
806
|
|
-<span><span class="co">## 18 80 0.10 0.9326282</span></span>
|
807
|
|
-<span><span class="co">## 19 90 0.10 0.9275777</span></span>
|
808
|
|
-<span><span class="co">## 20 100 0.10 0.9326282</span></span>
|
809
|
|
-<span><span class="co">## 21 10 1.00 0.7746331</span></span>
|
810
|
|
-<span><span class="co">## 22 20 1.00 0.8602058</span></span>
|
811
|
|
-<span><span class="co">## 23 30 1.00 0.8652563</span></span>
|
812
|
|
-<span><span class="co">## 24 40 1.00 0.9023251</span></span>
|
813
|
|
-<span><span class="co">## 25 50 1.00 0.9413951</span></span>
|
814
|
|
-<span><span class="co">## 26 60 1.00 0.9514961</span></span>
|
815
|
|
-<span><span class="co">## 27 70 1.00 0.9521631</span></span>
|
816
|
|
-<span><span class="co">## 28 80 1.00 0.9565466</span></span>
|
817
|
|
-<span><span class="co">## 29 90 1.00 0.9615971</span></span>
|
818
|
|
-<span><span class="co">## 30 100 1.00 0.9855155</span></span>
|
819
|
|
-<span><span class="co">## 31 10 10.00 0.7847341</span></span>
|
820
|
|
-<span><span class="co">## 32 20 10.00 0.8615399</span></span>
|
821
|
|
-<span><span class="co">## 33 30 10.00 0.8999428</span></span>
|
822
|
|
-<span><span class="co">## 34 40 10.00 0.9427292</span></span>
|
823
|
|
-<span><span class="co">## 35 50 10.00 0.9811321</span></span>
|
|
1054
|
+<span><span class="co">## 1 10 0.01 0.8218029</span></span>
|
|
1055
|
+<span><span class="co">## 2 20 0.01 0.8029350</span></span>
|
|
1056
|
+<span><span class="co">## 3 30 0.01 0.8180865</span></span>
|
|
1057
|
+<span><span class="co">## 4 40 0.01 0.8180865</span></span>
|
|
1058
|
+<span><span class="co">## 5 50 0.01 0.8268534</span></span>
|
|
1059
|
+<span><span class="co">## 6 60 0.01 0.8268534</span></span>
|
|
1060
|
+<span><span class="co">## 7 70 0.01 0.8268534</span></span>
|
|
1061
|
+<span><span class="co">## 8 80 0.01 0.8312369</span></span>
|
|
1062
|
+<span><span class="co">## 9 90 0.01 0.8652563</span></span>
|
|
1063
|
+<span><span class="co">## 10 100 0.01 0.8746903</span></span>
|
|
1064
|
+<span><span class="co">## 11 10 0.10 0.8073185</span></span>
|
|
1065
|
+<span><span class="co">## 12 20 0.10 0.8180865</span></span>
|
|
1066
|
+<span><span class="co">## 13 30 0.10 0.8942253</span></span>
|
|
1067
|
+<span><span class="co">## 14 40 0.10 0.8891748</span></span>
|
|
1068
|
+<span><span class="co">## 15 50 0.10 0.9420621</span></span>
|
|
1069
|
+<span><span class="co">## 16 60 0.10 0.9420621</span></span>
|
|
1070
|
+<span><span class="co">## 17 70 0.10 0.9420621</span></span>
|
|
1071
|
+<span><span class="co">## 18 80 0.10 0.9420621</span></span>
|
|
1072
|
+<span><span class="co">## 19 90 0.10 0.9420621</span></span>
|
|
1073
|
+<span><span class="co">## 20 100 0.10 0.9420621</span></span>
|
|
1074
|
+<span><span class="co">## 21 10 1.00 0.7985516</span></span>
|
|
1075
|
+<span><span class="co">## 22 20 1.00 0.8804079</span></span>
|
|
1076
|
+<span><span class="co">## 23 30 1.00 0.9087097</span></span>
|
|
1077
|
+<span><span class="co">## 24 40 1.00 0.9326282</span></span>
|
|
1078
|
+<span><span class="co">## 25 50 1.00 0.9710311</span></span>
|
|
1079
|
+<span><span class="co">## 26 60 1.00 0.9521631</span></span>
|
|
1080
|
+<span><span class="co">## 27 70 1.00 0.9760816</span></span>
|
|
1081
|
+<span><span class="co">## 28 80 1.00 0.9855155</span></span>
|
|
1082
|
+<span><span class="co">## 29 90 1.00 0.9760816</span></span>
|
|
1083
|
+<span><span class="co">## 30 100 1.00 0.9760816</span></span>
|
|
1084
|
+<span><span class="co">## 31 10 10.00 0.8275205</span></span>
|
|
1085
|
+<span><span class="co">## 32 20 10.00 0.8986087</span></span>
|
|
1086
|
+<span><span class="co">## 33 30 10.00 0.9477797</span></span>
|
|
1087
|
+<span><span class="co">## 34 40 10.00 0.9565466</span></span>
|
|
1088
|
+<span><span class="co">## 35 50 10.00 0.9760816</span></span>
|
824
|
1089
|
<span><span class="co">## 36 60 10.00 0.9905660</span></span>
|
825
|
|
-<span><span class="co">## 37 70 10.00 0.9905660</span></span>
|
|
1090
|
+<span><span class="co">## 37 70 10.00 1.0000000</span></span>
|
826
|
1091
|
<span><span class="co">## 38 80 10.00 1.0000000</span></span>
|
827
|
1092
|
<span><span class="co">## 39 90 10.00 1.0000000</span></span>
|
828
|
1093
|
<span><span class="co">## 40 100 10.00 1.0000000</span></span>
|
829
|
1094
|
<span><span class="co">## </span></span>
|
830
|
1095
|
<span><span class="co">## [[2]]$bestIndex</span></span>
|
831
|
|
-<span><span class="co">## [1] 38</span></span>
|
|
1096
|
+<span><span class="co">## [1] 37</span></span>
|
832
|
1097
|
<span><span class="co">## </span></span>
|
833
|
1098
|
<span><span class="co">## </span></span>
|
834
|
1099
|
<span><span class="co">## [[3]]</span></span>
|
835
|
1100
|
<span><span class="co">## [[3]]$tuneCombinations</span></span>
|
836
|
1101
|
<span><span class="co">## topN cost Balanced Accuracy</span></span>
|
837
|
|
-<span><span class="co">## 1 10 0.01 0.7739661</span></span>
|
838
|
|
-<span><span class="co">## 2 20 0.01 0.8602058</span></span>
|
839
|
|
-<span><span class="co">## 3 30 0.01 0.8703068</span></span>
|
840
|
|
-<span><span class="co">## 4 40 0.01 0.8652563</span></span>
|
841
|
|
-<span><span class="co">## 5 50 0.01 0.8847913</span></span>
|
842
|
|
-<span><span class="co">## 6 60 0.01 0.8804079</span></span>
|
843
|
|
-<span><span class="co">## 7 70 0.01 0.8898418</span></span>
|
844
|
|
-<span><span class="co">## 8 80 0.01 0.8992758</span></span>
|
845
|
|
-<span><span class="co">## 9 90 0.01 0.8942253</span></span>
|
846
|
|
-<span><span class="co">## 10 100 0.01 0.9043263</span></span>
|
847
|
|
-<span><span class="co">## 11 10 0.10 0.8029350</span></span>
|
848
|
|
-<span><span class="co">## 12 20 0.10 0.9043263</span></span>
|
849
|
|
-<span><span class="co">## 13 30 0.10 0.9188107</span></span>
|
850
|
|
-<span><span class="co">## 14 40 0.10 0.9289118</span></span>
|
851
|
|
-<span><span class="co">## 15 50 0.10 0.8911759</span></span>
|
852
|
|
-<span><span class="co">## 16 60 0.10 0.9144273</span></span>
|
853
|
|
-<span><span class="co">## 17 70 0.10 0.9238613</span></span>
|
854
|
|
-<span><span class="co">## 18 80 0.10 0.9477797</span></span>
|
855
|
|
-<span><span class="co">## 19 90 0.10 0.9383457</span></span>
|
856
|
|
-<span><span class="co">## 20 100 0.10 0.9572136</span></span>
|
857
|
|
-<span><span class="co">## 21 10 1.00 0.8413379</span></span>
|
858
|
|
-<span><span class="co">## 22 20 1.00 0.9572136</span></span>
|
859
|
|
-<span><span class="co">## 23 30 1.00 0.9565466</span></span>
|
860
|
|
-<span><span class="co">## 24 40 1.00 0.9477797</span></span>
|
861
|
|
-<span><span class="co">## 25 50 1.00 0.9760816</span></span>
|
862
|
|
-<span><span class="co">## 26 60 1.00 0.9760816</span></span>
|
863
|
|
-<span><span class="co">## 27 70 1.00 0.9905660</span></span>
|
864
|
|
-<span><span class="co">## 28 80 1.00 0.9905660</span></span>
|
865
|
|
-<span><span class="co">## 29 90 1.00 0.9905660</span></span>
|
|
1102
|
+<span><span class="co">## 1 10 0.01 0.8218029</span></span>
|
|
1103
|
+<span><span class="co">## 2 20 0.01 0.8167524</span></span>
|
|
1104
|
+<span><span class="co">## 3 30 0.01 0.8211359</span></span>
|
|
1105
|
+<span><span class="co">## 4 40 0.01 0.8022680</span></span>
|
|
1106
|
+<span><span class="co">## 5 50 0.01 0.8117019</span></span>
|
|
1107
|
+<span><span class="co">## 6 60 0.01 0.8268534</span></span>
|
|
1108
|
+<span><span class="co">## 7 70 0.01 0.8362874</span></span>
|
|
1109
|
+<span><span class="co">## 8 80 0.01 0.8218029</span></span>
|
|
1110
|
+<span><span class="co">## 9 90 0.01 0.8413379</span></span>
|
|
1111
|
+<span><span class="co">## 10 100 0.01 0.8507719</span></span>
|
|
1112
|
+<span><span class="co">## 11 10 0.10 0.7978845</span></span>
|
|
1113
|
+<span><span class="co">## 12 20 0.10 0.8501048</span></span>
|
|
1114
|
+<span><span class="co">## 13 30 0.10 0.8841243</span></span>
|
|
1115
|
+<span><span class="co">## 14 40 0.10 0.8746903</span></span>
|
|
1116
|
+<span><span class="co">## 15 50 0.10 0.8928912</span></span>
|
|
1117
|
+<span><span class="co">## 16 60 0.10 0.9269106</span></span>
|
|
1118
|
+<span><span class="co">## 17 70 0.10 0.9225272</span></span>
|
|
1119
|
+<span><span class="co">## 18 80 0.10 0.9231942</span></span>
|
|
1120
|
+<span><span class="co">## 19 90 0.10 0.9521631</span></span>
|
|
1121
|
+<span><span class="co">## 20 100 0.10 0.9427292</span></span>
|
|
1122
|
+<span><span class="co">## 21 10 1.00 0.8224700</span></span>
|
|
1123
|
+<span><span class="co">## 22 20 1.00 0.8659234</span></span>
|
|
1124
|
+<span><span class="co">## 23 30 1.00 0.8948923</span></span>
|
|
1125
|
+<span><span class="co">## 24 40 1.00 0.9275777</span></span>
|
|
1126
|
+<span><span class="co">## 25 50 1.00 0.9514961</span></span>
|
|
1127
|
+<span><span class="co">## 26 60 1.00 0.9565466</span></span>
|
|
1128
|
+<span><span class="co">## 27 70 1.00 0.9716981</span></span>
|
|
1129
|
+<span><span class="co">## 28 80 1.00 0.9811321</span></span>
|
|
1130
|
+<span><span class="co">## 29 90 1.00 0.9949495</span></span>
|
866
|
1131
|
<span><span class="co">## 30 100 1.00 1.0000000</span></span>
|
867
|
|
-<span><span class="co">## 31 10 10.00 0.8608729</span></span>
|
868
|
|
-<span><span class="co">## 32 20 10.00 0.9855155</span></span>
|
869
|
|
-<span><span class="co">## 33 30 10.00 0.9898990</span></span>
|
870
|
|
-<span><span class="co">## 34 40 10.00 1.0000000</span></span>
|
871
|
|
-<span><span class="co">## 35 50 10.00 1.0000000</span></span>
|
|
1132
|
+<span><span class="co">## 31 10 10.00 0.8281875</span></span>
|
|
1133
|
+<span><span class="co">## 32 20 10.00 0.8760244</span></span>
|
|
1134
|
+<span><span class="co">## 33 30 10.00 0.9471126</span></span>
|
|
1135
|
+<span><span class="co">## 34 40 10.00 0.9615971</span></span>
|
|
1136
|
+<span><span class="co">## 35 50 10.00 0.9905660</span></span>
|
872
|
1137
|
<span><span class="co">## 36 60 10.00 1.0000000</span></span>
|
873
|
1138
|
<span><span class="co">## 37 70 10.00 1.0000000</span></span>
|
874
|
1139
|
<span><span class="co">## 38 80 10.00 1.0000000</span></span>
|
...
|
...
|
@@ -882,88 +1147,88 @@
|
882
|
1147
|
<span><span class="co">## [[4]]</span></span>
|
883
|
1148
|
<span><span class="co">## [[4]]$tuneCombinations</span></span>
|
884
|
1149
|
<span><span class="co">## topN cost Balanced Accuracy</span></span>
|
885
|
|
-<span><span class="co">## 1 10 0.01 0.8073185</span></span>
|
886
|
|
-<span><span class="co">## 2 20 0.01 0.8167524</span></span>
|
887
|
|
-<span><span class="co">## 3 30 0.01 0.8073185</span></span>
|
888
|
|
-<span><span class="co">## 4 40 0.01 0.8073185</span></span>
|
889
|
|
-<span><span class="co">## 5 50 0.01 0.8463884</span></span>
|
890
|
|
-<span><span class="co">## 6 60 0.01 0.8463884</span></span>
|
891
|
|
-<span><span class="co">## 7 70 0.01 0.8420050</span></span>
|
892
|
|
-<span><span class="co">## 8 80 0.01 0.8275205</span></span>
|
893
|
|
-<span><span class="co">## 9 90 0.01 0.8703068</span></span>
|
894
|
|
-<span><span class="co">## 10 100 0.01 0.8746903</span></span>
|
895
|
|
-<span><span class="co">## 11 10 0.10 0.8079855</span></span>
|
896
|
|
-<span><span class="co">## 12 20 0.10 0.8268534</span></span>
|
897
|
|
-<span><span class="co">## 13 30 0.10 0.8224700</span></span>
|
898
|
|
-<span><span class="co">## 14 40 0.10 0.8325710</span></span>
|
899
|
|
-<span><span class="co">## 15 50 0.10 0.8942253</span></span>
|
900
|
|
-<span><span class="co">## 16 60 0.10 0.9036592</span></span>
|
901
|
|
-<span><span class="co">## 17 70 0.10 0.9130932</span></span>
|
902
|
|
-<span><span class="co">## 18 80 0.10 0.9181437</span></span>
|
903
|
|
-<span><span class="co">## 19 90 0.10 0.9188107</span></span>
|
904
|
|
-<span><span class="co">## 20 100 0.10 0.9188107</span></span>
|
905
|
|
-<span><span class="co">## 21 10 1.00 0.7891176</span></span>
|
906
|
|
-<span><span class="co">## 22 20 1.00 0.8137031</span></span>
|
907
|
|
-<span><span class="co">## 23 30 1.00 0.8608729</span></span>
|
908
|
|
-<span><span class="co">## 24 40 1.00 0.8665904</span></span>
|
909
|
|
-<span><span class="co">## 25 50 1.00 0.9565466</span></span>
|
910
|
|
-<span><span class="co">## 26 60 1.00 0.9471126</span></span>
|
911
|
|
-<span><span class="co">## 27 70 1.00 0.9565466</span></span>
|
912
|
|
-<span><span class="co">## 28 80 1.00 0.9754145</span></span>
|
913
|
|
-<span><span class="co">## 29 90 1.00 0.9754145</span></span>
|
914
|
|
-<span><span class="co">## 30 100 1.00 0.9848485</span></span>
|
915
|
|
-<span><span class="co">## 31 10 10.00 0.8036021</span></span>
|
916
|
|
-<span><span class="co">## 32 20 10.00 0.8376215</span></span>
|
917
|
|
-<span><span class="co">## 33 30 10.00 0.9194778</span></span>
|
918
|
|
-<span><span class="co">## 34 40 10.00 0.9150943</span></span>
|
|
1150
|
+<span><span class="co">## 1 10 0.01 0.7594816</span></span>
|
|
1151
|
+<span><span class="co">## 2 20 0.01 0.8174195</span></span>
|
|
1152
|
+<span><span class="co">## 3 30 0.01 0.8268534</span></span>
|
|
1153
|
+<span><span class="co">## 4 40 0.01 0.8362874</span></span>
|
|
1154
|
+<span><span class="co">## 5 50 0.01 0.8457214</span></span>
|
|
1155
|
+<span><span class="co">## 6 60 0.01 0.8457214</span></span>
|
|
1156
|
+<span><span class="co">## 7 70 0.01 0.8595388</span></span>
|
|
1157
|
+<span><span class="co">## 8 80 0.01 0.8784067</span></span>
|
|
1158
|
+<span><span class="co">## 9 90 0.01 0.8784067</span></span>
|
|
1159
|
+<span><span class="co">## 10 100 0.01 0.8689727</span></span>
|
|
1160
|
+<span><span class="co">## 11 10 0.10 0.7638651</span></span>
|
|
1161
|
+<span><span class="co">## 12 20 0.10 0.8602058</span></span>
|
|
1162
|
+<span><span class="co">## 13 30 0.10 0.8797408</span></span>
|
|
1163
|
+<span><span class="co">## 14 40 0.10 0.8948923</span></span>
|
|
1164
|
+<span><span class="co">## 15 50 0.10 0.8992758</span></span>
|
|
1165
|
+<span><span class="co">## 16 60 0.10 0.9043263</span></span>
|
|
1166
|
+<span><span class="co">## 17 70 0.10 0.9043263</span></span>
|
|
1167
|
+<span><span class="co">## 18 80 0.10 0.9326282</span></span>
|
|
1168
|
+<span><span class="co">## 19 90 0.10 0.9376787</span></span>
|
|
1169
|
+<span><span class="co">## 20 100 0.10 0.9615971</span></span>
|
|
1170
|
+<span><span class="co">## 21 10 1.00 0.7796836</span></span>
|
|
1171
|
|