Browse code

update website

Nick authored on 10/11/2022 23:02:59
Showing 68 changed files

... ...
@@ -20,7 +20,7 @@
20 20
     <a href="https://sydneybiox.github.io/ClassifyR/#main" class="visually-hidden-focusable">Skip to contents</a>
21 21
     
22 22
 
23
-    <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
23
+    <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-dark"><div class="container">
24 24
     
25 25
     <a class="navbar-brand me-2" href="https://sydneybiox.github.io/ClassifyR/index.html">ClassifyR</a>
26 26
 
... ...
@@ -6,12 +6,12 @@
6 6
 <meta http-equiv="X-UA-Compatible" content="IE=edge">
7 7
 <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
8 8
 <meta name="description" content="ClassifyR">
9
-<title>An Introduction to ClassifyR</title>
9
+<title>An Introduction to **ClassifyR** • ClassifyR</title>
10 10
 <script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
11 11
 <link href="../deps/bootstrap-5.1.3/bootstrap.min.css" rel="stylesheet">
12 12
 <script src="../deps/bootstrap-5.1.3/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous">
13 13
 <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous">
14
-<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to ClassifyR">
14
+<!-- bootstrap-toc --><script src="https://cdn.rawgit.com/afeld/bootstrap-toc/v1.0.1/dist/bootstrap-toc.min.js"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><meta property="og:title" content="An Introduction to **ClassifyR**">
15 15
 <meta property="og:description" content="ClassifyR">
16 16
 <!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
17 17
 <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
... ...
@@ -22,7 +22,7 @@
22 22
     <a href="#main" class="visually-hidden-focusable">Skip to contents</a>
23 23
     
24 24
 
25
-    <nav class="navbar fixed-top navbar-light navbar-expand-lg bg-light"><div class="container">
25
+    <nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-dark"><div class="container">
26 26
     
27 27
     <a class="navbar-brand me-2" href="../index.html">ClassifyR</a>
28 28
 
... ...
@@ -58,10 +58,13 @@
58 58
 
59 59
 
60 60
 
61
-<script src="ClassifyR_files/accessible-code-block-0.0.1/empty-anchor.js"></script><div class="row">
61
+
62
+<div class="row">
62 63
   <main id="main" class="col-md-9"><div class="page-header">
63
-      <img src="" class="logo" alt=""><h1>An Introduction to ClassifyR</h1>
64
-                        <h4 data-toc-skip class="author">Dario Strbenac, Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University of Sydney, Australia.</h4>
64
+      <img src="" class="logo" alt=""><h1>An Introduction to **ClassifyR**</h1>
65
+                        <h4 data-toc-skip class="author">Dario Strbenac,
66
+Ellis Patrick, Graham Mann, Jean Yang, John Ormerod <br> The University
67
+of Sydney, Australia.</h4>
65 68
             
66 69
       
67 70
       
... ...
@@ -73,30 +76,58 @@
73 76
 <div class="section level2">
74 77
 <h2 id="installation">Installation<a class="anchor" aria-label="anchor" href="#installation"></a>
75 78
 </h2>
76
-<p>Typically, each feature selection method or classifier originates from a different R package, which <strong>ClassifyR</strong> provides a wrapper around. By default, only high-performance t-test/F-test and random forest are installed. If you intend to compare between numerous different modelling methods, you should install all suggested packages at once by using the command <code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>. This will take a few minutes, particularly on Linux, because each package will be compiled from source code.</p>
79
+<p>Typically, each feature selection method or classifier originates
80
+from a different R package, which <strong>ClassifyR</strong> provides a
81
+wrapper around. By default, only high-performance t-test/F-test and
82
+random forest are installed. If you intend to compare between numerous
83
+different modelling methods, you should install all suggested packages
84
+at once by using the command
85
+<code>BiocManager::install("ClassifyR", dependencies = TRUE)</code>.
86
+This will take a few minutes, particularly on Linux, because each
87
+package will be compiled from source code.</p>
77 88
 </div>
78 89
 <div class="section level2">
79 90
 <h2 id="overview">Overview<a class="anchor" aria-label="anchor" href="#overview"></a>
80 91
 </h2>
81
-<p><strong>ClassifyR</strong> provides a structured pipeline for cross-validated classification. Classification is viewed in terms of four stages, data transformation, feature selection, classifier training, and prediction. The driver functions <em>crossValidate</em> and <em>runTests</em> implements varieties of cross-validation. They are:</p>
92
+<p><strong>ClassifyR</strong> provides a structured pipeline for
93
+cross-validated classification. Classification is viewed in terms of
94
+four stages, data transformation, feature selection, classifier
95
+training, and prediction. The driver functions <em>crossValidate</em>
96
+and <em>runTests</em> implements varieties of cross-validation. They
97
+are:</p>
82 98
 <ul>
83
-<li>Permutation of the order of samples followed by k-fold cross-validation (runTests only)</li>
99
+<li>Permutation of the order of samples followed by k-fold
100
+cross-validation (runTests only)</li>
84 101
 <li>Repeated x% test set cross-validation</li>
85 102
 <li>leave-k-out cross-validation</li>
86 103
 </ul>
87
-<p>Driver functions can use parallel processing capabilities in R to speed up cross-validations when many CPUs are available. The output of the driver functions is a <em>ClassifyResult</em> object which can be directly used by the performance evaluation functions. The process of classification is summarised by a flowchart.</p>
88
-<img src="" style="margin-left: auto;margin-right: auto"><p>Importantly, ClassifyR implements a number of methods for classification using different kinds of changes in measurements between classes. Most classifiers work with features where the means are different. In addition to changes in means (DM), <strong>ClassifyR</strong> also allows for classification using differential variability (DV; changes in scale) and differential distribution (DD; changes in location and/or scale).</p>
104
+<p>Driver functions can use parallel processing capabilities in R to
105
+speed up cross-validations when many CPUs are available. The output of
106
+the driver functions is a <em>ClassifyResult</em> object which can be
107
+directly used by the performance evaluation functions. The process of
108
+classification is summarised by a flowchart.</p>
109
+<img src="" style="margin-left: auto;margin-right: auto"><p>Importantly, ClassifyR implements a number of methods for
110
+classification using different kinds of changes in measurements between
111
+classes. Most classifiers work with features where the means are
112
+different. In addition to changes in means (DM),
113
+<strong>ClassifyR</strong> also allows for classification using
114
+differential variability (DV; changes in scale) and differential
115
+distribution (DD; changes in location and/or scale).</p>
89 116
 <div class="section level3">
90 117
 <h3 id="case-study-diagnosing-asthma">Case Study: Diagnosing Asthma<a class="anchor" aria-label="anchor" href="#case-study-diagnosing-asthma"></a>
91 118
 </h3>
92
-<p>To demonstrate some key features of ClassifyR, a data set consisting of the 2000 most variably expressed genes and 190 people will be used to quickly obtain results. The journal article corresponding to the data set was published in <em>Scientific Reports</em> in 2018 and is titled <a href="http://www.nature.com/articles/s41598-018-27189-4" class="external-link">A Nasal Brush-based Classifier of Asthma Identified by Machine Learning Analysis of Nasal RNA Sequence Data</a>.</p>
119
+<p>To demonstrate some key features of ClassifyR, a data set consisting
120
+of the 2000 most variably expressed genes and 190 people will be used to
121
+quickly obtain results. The journal article corresponding to the data
122
+set was published in <em>Scientific Reports</em> in 2018 and is titled
123
+<a href="http://www.nature.com/articles/s41598-018-27189-4" class="external-link">A Nasal
124
+Brush-based Classifier of Asthma Identified by Machine Learning Analysis
125
+of Nasal RNA Sequence Data</a>.</p>
93 126
 <p>Load the package.</p>
94 127
 <div class="sourceCode" id="cb1"><pre class="downlit sourceCode r">
95 128
 <code class="sourceCode R"><span><span class="kw"><a href="https://rdrr.io/r/base/library.html" class="external-link">library</a></span><span class="op">(</span><span class="va"><a href="https://sydneybiox.github.io/ClassifyR/">ClassifyR</a></span><span class="op">)</span></span></code></pre></div>
96
-<pre><code><span><span class="co">## Warning: multiple methods tables found for 'aperm'</span></span></code></pre>
97
-<pre><code><span><span class="co">## Warning: replacing previous import 'BiocGenerics::aperm' by 'DelayedArray::aperm' when loading 'SummarizedExperiment'</span></span></code></pre>
98 129
 <p>A glimpse at the RNA measurements and sample classes.</p>
99
-<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
130
+<div class="sourceCode" id="cb2"><pre class="downlit sourceCode r">
100 131
 <code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/data.html" class="external-link">data</a></span><span class="op">(</span><span class="va">asthma</span><span class="op">)</span> <span class="co"># Contains measurements and classes variables.</span></span>
101 132
 <span><span class="va">measurements</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">5</span>, <span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span></code></pre></div>
102 133
 <pre><code><span><span class="co">##            HBB BPIFA1  XIST FCGR3B HBA2</span></span>
... ...
@@ -105,19 +136,44 @@
105 136
 <span><span class="co">## Sample 3 12.15  17.44 10.21   7.87 9.68</span></span>
106 137
 <span><span class="co">## Sample 4 10.60  11.87  6.27  14.75 8.96</span></span>
107 138
 <span><span class="co">## Sample 5  8.18  15.01 11.21   6.77 6.43</span></span></code></pre>
108
-<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
139
+<div class="sourceCode" id="cb4"><pre class="downlit sourceCode r">
109 140
 <code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">classes</span><span class="op">)</span></span></code></pre></div>
110 141
 <pre><code><span><span class="co">## [1] No  No  No  No  Yes No </span></span>
111 142
 <span><span class="co">## Levels: No Yes</span></span></code></pre>
112
-<p>The numeric matrix variable <em>measurements</em> stores the normalised values of the RNA gene abundances for each sample and the factor vector <em>classes</em> identifies which class the samples belong to. The measurements were normalised using <strong>DESeq2</strong>’s <em>varianceStabilizingTransformation</em> function, which produces <span class="math inline">\(log_2\)</span>-like data.</p>
113
-<p>For more complex data sets with multiple kinds of experiments (e.g. DNA methylation, copy number, gene expression on the same set of samples) a <a href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html" class="external-link"><em>MultiAssayExperiment</em></a> is recommended for data storage and supported by <strong>ClassifyR</strong>’s methods.</p>
143
+<p>The numeric matrix variable <em>measurements</em> stores the
144
+normalised values of the RNA gene abundances for each sample and the
145
+factor vector <em>classes</em> identifies which class the samples belong
146
+to. The measurements were normalised using <strong>DESeq2</strong>’s
147
+<em>varianceStabilizingTransformation</em> function, which produces
148
+<span class="math inline">\(log_2\)</span>-like data.</p>
149
+<p>For more complex data sets with multiple kinds of experiments
150
+(e.g. DNA methylation, copy number, gene expression on the same set of
151
+samples) a <a href="https://bioconductor.org/packages/release/bioc/html/MultiAssayExperiment.html" class="external-link"><em>MultiAssayExperiment</em></a>
152
+is recommended for data storage and supported by
153
+<strong>ClassifyR</strong>’s methods.</p>
114 154
 </div>
115 155
 </div>
116 156
 <div class="section level2">
117 157
 <h2 id="quick-start-crossvalidate-function">Quick Start: <em>crossValidate</em> Function<a class="anchor" aria-label="anchor" href="#quick-start-crossvalidate-function"></a>
118 158
 </h2>
119
-<p>The <em>crossValidate</em> function offers a quick and simple way to start analysing a dataset in ClassifyR. It is a wrapper for <em>runTests</em>, the core model building and testing function of ClassifyR. <em>crossValidate</em> must be supplied with <em>measurements</em>, a simple tabular data container or a list-like structure of such related tabular data on common samples. The classes of it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>, <em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>. For a dataset with <span class="math inline">\(n\)</span> observations and <span class="math inline">\(p\)</span> variables, the <em>crossValidate</em> function will accept inputs of the following shapes:</p>
159
+<p>The <em>crossValidate</em> function offers a quick and simple way to
160
+start analysing a dataset in ClassifyR. It is a wrapper for
161
+<em>runTests</em>, the core model building and testing function of
162
+ClassifyR. <em>crossValidate</em> must be supplied with
163
+<em>measurements</em>, a simple tabular data container or a list-like
164
+structure of such related tabular data on common samples. The classes of
165
+it may be <em>matrix</em>, <em>data.frame</em>, <em>DataFrame</em>,
166
+<em>MultiAssayExperiment</em> or <em>list</em> of <em>data.frames</em>.
167
+For a dataset with <span class="math inline">\(n\)</span> observations
168
+and <span class="math inline">\(p\)</span> variables, the
169
+<em>crossValidate</em> function will accept inputs of the following
170
+shapes:</p>
120 171
 <table class="table">
172
+<colgroup>
173
+<col width="25%">
174
+<col width="37%">
175
+<col width="37%">
176
+</colgroup>
121 177
 <thead><tr class="header">
122 178
 <th>Data Type</th>
123 179
 <th align="center"><span class="math inline">\(n \times p\)</span></th>
... ...
@@ -146,34 +202,64 @@
146 202
 </tr>
147 203
 <tr class="odd">
148 204
 <td>
149
-<span style="font-family: 'Courier New', monospace;">list</span> of <span style="font-family: 'Courier New', monospace;">data.frame</span>s</td>
205
+<span style="font-family: 'Courier New', monospace;">list</span> of
206
+<span style="font-family: 'Courier New', monospace;">data.frame</span>s</td>
150 207
 <td align="center">✔</td>
151 208
 <td align="center"></td>
152 209
 </tr>
153 210
 </tbody>
154 211
 </table>
155
-<p><em>crossValidate</em> must also be supplied with <em>outcome</em>, which represents the prediction to be made in a variety of possible ways.</p>
212
+<p><em>crossValidate</em> must also be supplied with <em>outcome</em>,
213
+which represents the prediction to be made in a variety of possible
214
+ways.</p>
156 215
 <ul>
157
-<li>A <em>factor</em> that contains the class label for each observation. <em>classes</em> must be of length <span class="math inline">\(n\)</span>.</li>
158
-<li>A <em>character</em> of length 1 that matches a column name in a data frame which holds the classes. The classes will automatically be removed before training is done.</li>
159
-<li>A <em>Surv</em> object of the same length as the number of samples in the data which contains information about the time and censoring of the samples.</li>
160
-<li>A <em>character</em> vector of length 2 or 3 that each match a column name in a data frame which holds information about the time and censoring of the samples. The time-to-event columns will automatically be removed before training is done.</li>
216
+<li>A <em>factor</em> that contains the class label for each
217
+observation. <em>classes</em> must be of length <span class="math inline">\(n\)</span>.</li>
218
+<li>A <em>character</em> of length 1 that matches a column name in a
219
+data frame which holds the classes. The classes will automatically be
220
+removed before training is done.</li>
221
+<li>A <em>Surv</em> object of the same length as the number of samples
222
+in the data which contains information about the time and censoring of
223
+the samples.</li>
224
+<li>A <em>character</em> vector of length 2 or 3 that each match a
225
+column name in a data frame which holds information about the time and
226
+censoring of the samples. The time-to-event columns will automatically
227
+be removed before training is done.</li>
161 228
 </ul>
162
-<p>The type of classifier used can be changed with the <em>classifier</em> argument. The default is a random forest, which seamlessly handles categorical and numerical data. A full list of classifiers can be seen by running <em>?crossValidate</em>. A feature selection step can be performed before classification using <em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by default. Similarly, the number of folds and number of repeats for cross validation can be changed with the <em>nFolds</em> and <em>nRepeats</em> arguments. If wanted, <em>nCores</em> can be specified to run the cross validation in parallel. To perform 5-fold cross-validation of a Support Vector Machine with 2 repeats:</p>
163
-<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
229
+<p>The type of classifier used can be changed with the
230
+<em>classifier</em> argument. The default is a random forest, which
231
+seamlessly handles categorical and numerical data. A full list of
232
+classifiers can be seen by running <em>?crossValidate</em>. A feature
233
+selection step can be performed before classification using
234
+<em>nFeatures</em> and <em>selectionMethod</em>, which is a t-test by
235
+default. Similarly, the number of folds and number of repeats for cross
236
+validation can be changed with the <em>nFolds</em> and <em>nRepeats</em>
237
+arguments. If wanted, <em>nCores</em> can be specified to run the cross
238
+validation in parallel. To perform 5-fold cross-validation of a Support
239
+Vector Machine with 2 repeats:</p>
240
+<div class="sourceCode" id="cb6"><pre class="downlit sourceCode r">
164 241
 <code class="sourceCode R"><span><span class="va">result</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/crossValidate.html">crossValidate</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, classifier <span class="op">=</span> <span class="st">"SVM"</span>,</span>
165 242
 <span>                        nFeatures <span class="op">=</span> <span class="fl">20</span>, nFolds <span class="op">=</span> <span class="fl">5</span>, nRepeats <span class="op">=</span> <span class="fl">2</span>, nCores <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
166 243
 <pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
167
-<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
244
+<div class="sourceCode" id="cb8"><pre class="downlit sourceCode r">
168 245
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span><span class="op">)</span></span></code></pre></div>
169 246
 <pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
170 247
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-5-1.png" width="700"></p>
171 248
 <div class="section level3">
172 249
 <h3 id="data-integration-with-crossvalidate">Data Integration with crossValidate<a class="anchor" aria-label="anchor" href="#data-integration-with-crossvalidate"></a>
173 250
 </h3>
174
-<p><em>crossValidate</em> also allows data from multiple sources to be integrated into a single model. The integration method can be specified with <em>multiViewMethod</em> argument. In this example, suppose the first 10 variables in the asthma data set are from a certain source and the remaining 1990 variables are from a second source. To integrate multiple data sets, each variable must be labeled with the data set it came from. This is done in a different manner depending on the data type of <em>measurements</em>.</p>
175
-<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified using <em>mcols</em>. In the column metadata, each feature must have an <em>assay</em> and a <em>feature</em> name.</p>
176
-<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
251
+<p><em>crossValidate</em> also allows data from multiple sources to be
252
+integrated into a single model. The integration method can be specified
253
+with <em>multiViewMethod</em> argument. In this example, suppose the
254
+first 10 variables in the asthma data set are from a certain source and
255
+the remaining 1990 variables are from a second source. To integrate
256
+multiple data sets, each variable must be labeled with the data set it
257
+came from. This is done in a different manner depending on the data type
258
+of <em>measurements</em>.</p>
259
+<p>If using Bioconductor’s <em>DataFrame</em>, this can be specified
260
+using <em>mcols</em>. In the column metadata, each feature must have an
261
+<em>assay</em> and a <em>feature</em> name.</p>
262
+<div class="sourceCode" id="cb10"><pre class="downlit sourceCode r">
177 263
 <code class="sourceCode R"><span><span class="va">measurementsDF</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/DataFrame-class.html" class="external-link">DataFrame</a></span><span class="op">(</span><span class="va">measurements</span><span class="op">)</span></span>
178 264
 <span><span class="fu"><a href="https://rdrr.io/pkg/S4Vectors/man/Vector-class.html" class="external-link">mcols</a></span><span class="op">(</span><span class="va">measurementsDF</span><span class="op">)</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span></span>
179 265
 <span>  assay <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/rep.html" class="external-link">rep</a></span><span class="op">(</span><span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="st">"assay_1"</span>, <span class="st">"assay_2"</span><span class="op">)</span>, times <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">10</span>, <span class="fl">1990</span><span class="op">)</span><span class="op">)</span>,</span>
... ...
@@ -185,12 +271,13 @@
185 271
 <pre><code><span><span class="co">## Processing sample set 10.</span></span>
186 272
 <span><span class="co">## Processing sample set 10.</span></span>
187 273
 <span><span class="co">## Processing sample set 10.</span></span></code></pre>
188
-<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
274
+<div class="sourceCode" id="cb12"><pre class="downlit sourceCode r">
189 275
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
190 276
 <pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
191 277
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-6-1.png" width="700"></p>
192
-<p>If using a list of <em>data.frame</em>s, the name of each element in the list will be used as the assay name.</p>
193
-<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
278
+<p>If using a list of <em>data.frame</em>s, the name of each element in
279
+the list will be used as the assay name.</p>
280
+<div class="sourceCode" id="cb14"><pre class="downlit sourceCode r">
194 281
 <code class="sourceCode R"><span><span class="co"># Assigns first 10 variables to dataset_1, and the rest to dataset_2</span></span>
195 282
 <span><span class="va">measurementsList</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span></span>
196 283
 <span>  <span class="op">(</span><span class="va">measurements</span> <span class="op">|&gt;</span> <span class="fu"><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">10</span><span class="op">]</span>,</span>
... ...
@@ -203,7 +290,7 @@
203 290
 <pre><code><span><span class="co">## Processing sample set 10.</span></span>
204 291
 <span><span class="co">## Processing sample set 10.</span></span>
205 292
 <span><span class="co">## Processing sample set 10.</span></span></code></pre>
206
-<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
293
+<div class="sourceCode" id="cb16"><pre class="downlit sourceCode r">
207 294
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">result</span>, characteristicsList <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>x <span class="op">=</span> <span class="st">"Assay Name"</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
208 295
 <pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
209 296
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-7-1.png" width="700"></p>
... ...
@@ -212,11 +299,17 @@
212 299
 <div class="section level2">
213 300
 <h2 id="a-more-detailed-look-at-classifyr">A More Detailed Look at ClassifyR<a class="anchor" aria-label="anchor" href="#a-more-detailed-look-at-classifyr"></a>
214 301
 </h2>
215
-<p>In the following sections, some of the most useful functions provided in <strong>ClassifyR</strong> will be demonstrated. However, a user could wrap any feature selection, training, or prediction function to the classification framework, as long as it meets some simple rules about the input and return parameters. See the appendix section of this guide titled “Rules for New Functions” for a description of these.</p>
302
+<p>In the following sections, some of the most useful functions provided
303
+in <strong>ClassifyR</strong> will be demonstrated. However, a user
304
+could wrap any feature selection, training, or prediction function to
305
+the classification framework, as long as it meets some simple rules
306
+about the input and return parameters. See the appendix section of this
307
+guide titled “Rules for New Functions” for a description of these.</p>
216 308
 <div class="section level3">
217 309
 <h3 id="comparison-to-existing-classification-frameworks">Comparison to Existing Classification Frameworks<a class="anchor" aria-label="anchor" href="#comparison-to-existing-classification-frameworks"></a>
218 310
 </h3>
219
-<p>There are a few other frameworks for classification in R. The table below provides a comparison of which features they offer.</p>
311
+<p>There are a few other frameworks for classification in R. The table
312
+below provides a comparison of which features they offer.</p>
220 313
 <table class="table">
221 314
 <colgroup>
222 315
 <col width="8%">
... ...
@@ -302,12 +395,19 @@
302 395
 <div class="section level3">
303 396
 <h3 id="provided-functionality">Provided Functionality<a class="anchor" aria-label="anchor" href="#provided-functionality"></a>
304 397
 </h3>
305
-<p>Although being a cross-validation framework, a number of popular feature selection and classification functions are provided by the package which meet the requirements of functions to be used by it (see the last section).</p>
398
+<p>Although being a cross-validation framework, a number of popular
399
+feature selection and classification functions are provided by the
400
+package which meet the requirements of functions to be used by it (see
401
+the last section).</p>
306 402
 <div class="section level4">
307 403
 <h4 id="provided-methods-for-feature-selection-and-classification">Provided Methods for Feature Selection and Classification<a class="anchor" aria-label="anchor" href="#provided-methods-for-feature-selection-and-classification"></a>
308 404
 </h4>
309
-<p>In the following tables, a function that is used when no function is explicitly specified by the user is shown as <span style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
310
-<p>The functions below produce a ranking, of which different size subsets are tried and the classifier performance evaluated, to select a best subset of features, based on a criterion such as balanced accuracy rate, for example.</p>
405
+<p>In the following tables, a function that is used when no function is
406
+explicitly specified by the user is shown as <span style="padding:4px; border:2px dashed #e64626;">functionName</span>.</p>
407
+<p>The functions below produce a ranking, of which different size
408
+subsets are tried and the classifier performance evaluated, to select a
409
+best subset of features, based on a criterion such as balanced accuracy
410
+rate, for example.</p>
311 411
 <table style="width:100%;" class="table">
312 412
 <colgroup>
313 413
 <col width="9%">
... ...
@@ -361,7 +461,8 @@
361 461
 </tr>
362 462
 <tr class="even">
363 463
 <td><span style="font-family: 'Courier New', monospace;">DMDranking</span></td>
364
-<td><span style="white-space: nowrap">Difference in location (mean/median) and/or scale (SD, MAD, <span class="math inline">\(Q_n\)</span>)</span></td>
464
+<td><span style="white-space: nowrap">Difference in location
465
+(mean/median) and/or scale (SD, MAD, <span class="math inline">\(Q_n\)</span>)</span></td>
365 466
 <td>✔</td>
366 467
 <td>✔</td>
367 468
 <td>✔</td>
... ...
@@ -410,7 +511,9 @@
410 511
 <td>
411 512
 <span style="padding:1px; border:2px dashed #e64626; display:inline-block; margin-bottom: 3px; font-family: 'Courier New', monospace;">DLDAtrainInterface</span>,<br><span style="padding:1px; border:2px dashed #e64626; display:inline-block; font-family: 'Courier New', monospace;">DLDApredictInterface</span>
412 513
 </td>
413
-<td>Wrappers for sparsediscrim’s functions <span style="font-family: 'Courier New', monospace;">dlda</span> and <span style="font-family: 'Courier New', monospace;">predict.dlda</span> functions</td>
514
+<td>Wrappers for sparsediscrim’s functions <span style="font-family: 'Courier New', monospace;">dlda</span> and
515
+<span style="font-family: 'Courier New', monospace;">predict.dlda</span>
516
+functions</td>
414 517
 <td>✔</td>
415 518
 <td></td>
416 519
 <td></td>
... ...
@@ -425,9 +528,11 @@
425 528
 </tr>
426 529
 <tr class="odd">
427 530
 <td>
428
-<span style="font-family: 'Courier New', monospace;">elasticNetGLMtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">elasticNetGLMpredictInterface</span>
531
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMtrainInterface</span>,
532
+<span style="font-family: 'Courier New', monospace;">elasticNetGLMpredictInterface</span>
429 533
 </td>
430
-<td>Wrappers for glmnet’s elastic net GLM functions <span style="font-family: 'Courier New', monospace;">glmnet</span> and <span style="font-family: 'Courier New', monospace;">predict.glmnet</span>
534
+<td>Wrappers for glmnet’s elastic net GLM functions <span style="font-family: 'Courier New', monospace;">glmnet</span> and
535
+<span style="font-family: 'Courier New', monospace;">predict.glmnet</span>
431 536
 </td>
432 537
 <td>✔</td>
433 538
 <td></td>
... ...
@@ -435,9 +540,11 @@
435 540
 </tr>
436 541
 <tr class="even">
437 542
 <td>
438
-<span style="font-family: 'Courier New', monospace;">NSCtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">NSCpredictInterface</span>
543
+<span style="font-family: 'Courier New', monospace;">NSCtrainInterface</span>,
544
+<span style="font-family: 'Courier New', monospace;">NSCpredictInterface</span>
439 545
 </td>
440
-<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span style="font-family: 'Courier New', monospace;">pamr.train</span> and <span style="font-family: 'Courier New', monospace;">pamr.predict</span>
546
+<td>Wrappers for pamr’s Nearest Shrunken Centroid functions <span style="font-family: 'Courier New', monospace;">pamr.train</span>
547
+and <span style="font-family: 'Courier New', monospace;">pamr.predict</span>
441 548
 </td>
442 549
 <td>✔</td>
443 550
 <td></td>
... ...
@@ -452,7 +559,8 @@
452 559
 </tr>
453 560
 <tr class="even">
454 561
 <td>
455
-<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>, <span style="font-family: 'Courier New', monospace;">mixModelsPredict</span>
562
+<span style="font-family: 'Courier New', monospace;">mixModelsTrain</span>,
563
+<span style="font-family: 'Courier New', monospace;">mixModelsPredict</span>
456 564
 </td>
457 565
 <td>Feature-wise mixtures of normals and voting</td>
458 566
 <td>✔</td>
... ...
@@ -468,9 +576,11 @@
468 576
 </tr>
469 577
 <tr class="even">
470 578
 <td>
471
-<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span>
579
+<span style="font-family: 'Courier New', monospace;">randomForestTrainInterface</span>,
580
+<span style="font-family: 'Courier New', monospace;">randomForestPredictInterface</span>
472 581
 </td>
473
-<td>Wrapper for ranger’s functions <span style="font-family: 'Courier New', monospace;">ranger</span> and <span style="font-family: 'Courier New', monospace;">predict</span>
582
+<td>Wrapper for ranger’s functions <span style="font-family: 'Courier New', monospace;">ranger</span> and
583
+<span style="font-family: 'Courier New', monospace;">predict</span>
474 584
 </td>
475 585
 <td>✔</td>
476 586
 <td>✔</td>
... ...
@@ -478,9 +588,11 @@
478 588
 </tr>
479 589
 <tr class="odd">
480 590
 <td>
481
-<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>, <span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span>
591
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingTrainInterface</span>,
592
+<span style="font-family: 'Courier New', monospace;">extremeGradientBoostingPredictInterface</span>
482 593
 </td>
483
-<td>Wrapper for xgboost’s functions <span style="font-family: 'Courier New', monospace;">xgboost</span> and <span style="font-family: 'Courier New', monospace;">predict</span>
594
+<td>Wrapper for xgboost’s functions <span style="font-family: 'Courier New', monospace;">xgboost</span>
595
+and <span style="font-family: 'Courier New', monospace;">predict</span>
484 596
 </td>
485 597
 <td>✔</td>
486 598
 <td>✔</td>
... ...
@@ -496,9 +608,11 @@
496 608
 </tr>
497 609
 <tr class="odd">
498 610
 <td>
499
-<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>, <span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span>
611
+<span style="font-family: 'Courier New', monospace;">SVMtrainInterface</span>,
612
+<span style="font-family: 'Courier New', monospace;">SVMpredictInterface</span>
500 613
 </td>
501
-<td>Wrapper for e1071’s functions <span style="font-family: 'Courier New', monospace;">svm</span> and <span style="font-family: 'Courier New', monospace;">predict.svm</span>
614
+<td>Wrapper for e1071’s functions <span style="font-family: 'Courier New', monospace;">svm</span> and
615
+<span style="font-family: 'Courier New', monospace;">predict.svm</span>
502 616
 </td>
503 617
 <td>✔</td>
504 618
 <td>✔ †</td>
... ...
@@ -506,13 +620,21 @@
506 620
 </tr>
507 621
 </tbody>
508 622
 </table>
509
-<p>* If ordinary numeric measurements have been transformed to absolute deviations using <span style="font-family: 'Courier New', monospace;">subtractFromLocation</span>.<br> † If the value of <span style="font-family: 'Courier New', monospace;">kernel</span> is not <span style="font-family: 'Courier New', monospace;">“linear”</span>.</p>
510
-<p>If a desired selection or classification method is not already implemented, rules for writing functions to work with <strong>ClassifyR</strong> are outlined in the wrapper vignette. Please visit it for more information.</p>
623
+<p>* If ordinary numeric measurements have been transformed to absolute
624
+deviations using <span style="font-family: 'Courier New', monospace;">subtractFromLocation</span>.<br>
625
+† If the value of <span style="font-family: 'Courier New', monospace;">kernel</span> is
626
+not <span style="font-family: 'Courier New', monospace;">“linear”</span>.</p>
627
+<p>If a desired selection or classification method is not already
628
+implemented, rules for writing functions to work with
629
+<strong>ClassifyR</strong> are outlined in the wrapper vignette. Please
630
+visit it for more information.</p>
511 631
 </div>
512 632
 <div class="section level4">
513 633
 <h4 id="provided-meta-feature-methods">Provided Meta-feature Methods<a class="anchor" aria-label="anchor" href="#provided-meta-feature-methods"></a>
514 634
 </h4>
515
-<p>A number of methods are provided for users to enable classification in a feature-set-centric or interactor-centric way. The meta-feature creation functions should be used before cross-validation is done.</p>
635
+<p>A number of methods are provided for users to enable classification
636
+in a feature-set-centric or interactor-centric way. The meta-feature
637
+creation functions should be used before cross-validation is done.</p>
516 638
 <table class="table">
517 639
 <colgroup>
518 640
 <col width="9%">
... ...
@@ -529,25 +651,30 @@
529 651
 <tbody>
530 652
 <tr class="odd">
531 653
 <td><span style="font-family: 'Courier New', monospace;">edgesToHubNetworks</span></td>
532
-<td>Takes a two-column <span style="font-family: 'Courier New', monospace;">matrix</span> or <span style="font-family: 'Courier New', monospace;">DataFrame</span> and finds all nodes with at least a minimum number of interactions</td>
654
+<td>Takes a two-column <span style="font-family: 'Courier New', monospace;">matrix</span> or
655
+<span style="font-family: 'Courier New', monospace;">DataFrame</span>
656
+and finds all nodes with at least a minimum number of interactions</td>
533 657
 <td align="center">✔</td>
534 658
 <td align="center"></td>
535 659
 </tr>
536 660
 <tr class="even">
537 661
 <td><span style="font-family: 'Courier New', monospace;">featureSetSummary</span></td>
538
-<td><span style="white-space: nowrap">Considers sets of features and calculates their mean or median</span></td>
662
+<td><span style="white-space: nowrap">Considers sets of features and
663
+calculates their mean or median</span></td>
539 664
 <td align="center">✔</td>
540 665
 <td align="center"></td>
541 666
 </tr>
542 667
 <tr class="odd">
543 668
 <td><span style="font-family: 'Courier New', monospace;">pairsDifferencesSelection</span></td>
544
-<td>Finds a set of pairs of features whose measurement inequalities can be used for predicting with</td>
669
+<td>Finds a set of pairs of features whose measurement inequalities can
670
+be used for predicting with</td>
545 671
 <td align="center"></td>
546 672
 <td align="center">✔</td>
547 673
 </tr>
548 674
 <tr class="even">
549 675
 <td><span style="font-family: 'Courier New', monospace;">kTSPclassifier</span></td>
550
-<td>Voting classifier that uses inequalities between pairs of features to vote for one of two classes</td>
676
+<td>Voting classifier that uses inequalities between pairs of features
677
+to vote for one of two classes</td>
551 678
 <td align="center"></td>
552 679
 <td align="center">✔</td>
553 680
 </tr>
... ...
@@ -556,14 +683,35 @@
556 683
 </div>
557 684
 </div>
558 685
 <div class="section level3">
559
-<h3 id="fine-grained-cross-validation-and-modelling-using-runtests">Fine-grained Cross-validation and Modelling Using <em>runTests</em><a class="anchor" aria-label="anchor" href="#fine-grained-cross-validation-and-modelling-using-runtests"></a>
686
+<h3 id="fine-grained-cross-validation-and-modelling-using-runtests">Fine-grained Cross-validation and Modelling Using
687
+<em>runTests</em><a class="anchor" aria-label="anchor" href="#fine-grained-cross-validation-and-modelling-using-runtests"></a>
560 688
 </h3>
561
-<p>For more control over the finer aspects of cross-validation of a single data set, <em>runTests</em> may be employed in place of <em>crossValidate</em>. For the variety of cross-validation, the parameters are specified by a <em>CrossValParams</em> object. The default setting is for 100 permutations and five folds and parameter tuning is done by resubstitution. It is also recommended to specify a <em>parallelParams</em> setting. On Linux and MacOS operating systems, it should be <em>MulticoreParam</em> and on Windows computers it should be <em>SnowParam</em>. Note that each of these have an option <em>RNGseed</em> and this <strong>needs to be set by the user</strong> because some classifiers or feature selection functions will have some element of randomisation. One example that works on all operating systems, but is best-suited to Windows is:</p>
562
-<div class="sourceCode" id="cb20"><pre class="downlit sourceCode r">
689
+<p>For more control over the finer aspects of cross-validation of a
690
+single data set, <em>runTests</em> may be employed in place of
691
+<em>crossValidate</em>. For the variety of cross-validation, the
692
+parameters are specified by a <em>CrossValParams</em> object. The
693
+default setting is for 100 permutations and five folds and parameter
694
+tuning is done by resubstitution. It is also recommended to specify a
695
+<em>parallelParams</em> setting. On Linux and MacOS operating systems,
696
+it should be <em>MulticoreParam</em> and on Windows computers it should
697
+be <em>SnowParam</em>. Note that each of these have an option
698
+<em>RNGseed</em> and this <strong>needs to be set by the user</strong>
699
+because some classifiers or feature selection functions will have some
700
+element of randomisation. One example that works on all operating
701
+systems, but is best-suited to Windows is:</p>
702
+<div class="sourceCode" id="cb18"><pre class="downlit sourceCode r">
563 703
 <code class="sourceCode R"><span><span class="va">CVparams</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/CrossValParams-class.html">CrossValParams</a></span><span class="op">(</span>parallelParams <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/pkg/BiocParallel/man/SnowParam-class.html" class="external-link">SnowParam</a></span><span class="op">(</span><span class="fl">16</span>, RNGseed <span class="op">=</span> <span class="fl">123</span><span class="op">)</span><span class="op">)</span></span>
564 704
 <span><span class="va">CVparams</span></span></code></pre></div>
565
-<p>For the actual operations to do to the data to build a model of it, each of the stages should be specified by an object of class <em>ModellingParams</em>. This controls how class imbalance is handled (default is to downsample to the smallest class), any transformation that needs to be done inside of cross-validation (i.e. involving a computed value from the training set), any feature selection and the training and prediction functions to be used. The default is to do an ordinary t-test (two groups) or ANOVA (three or more groups) and classification using diagonal LDA.</p>
566
-<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
705
+<p>For the actual operations to do to the data to build a model of it,
706
+each of the stages should be specified by an object of class
707
+<em>ModellingParams</em>. This controls how class imbalance is handled
708
+(default is to downsample to the smallest class), any transformation
709
+that needs to be done inside of cross-validation (i.e. involving a
710
+computed value from the training set), any feature selection and the
711
+training and prediction functions to be used. The default is to do an
712
+ordinary t-test (two groups) or ANOVA (three or more groups) and
713
+classification using diagonal LDA.</p>
714
+<div class="sourceCode" id="cb19"><pre class="downlit sourceCode r">
567 715
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span><span class="op">)</span></span></code></pre></div>
568 716
 <pre><code><span><span class="co">## An object of class "ModellingParams"</span></span>
569 717
 <span><span class="co">## Slot "balancing":</span></span>
... ...
@@ -589,20 +737,38 @@
589 737
 <div class="section level3">
590 738
 <h3 id="runtests-driver-function-of-cross-validated-classification">runTests Driver Function of Cross-validated Classification<a class="anchor" aria-label="anchor" href="#runtests-driver-function-of-cross-validated-classification"></a>
591 739
 </h3>
592
-<p><em>runTests</em> is the main function in <strong>ClassifyR</strong> which handles the sample splitting and parallelisation, if used, of cross-validation. To begin with, a simple classifier will be demonstrated. It uses a t-test or ANOVA ranking (depending on the number of classes) for feature ranking and DLDA for classification. This classifier relies on differences in means between classes. No parameters need to be specified, because this is the default classification of <em>runTests</em>. By default, the number of features is tuned by resubstitution on the training set.</p>
593
-<div class="sourceCode" id="cb23"><pre class="downlit sourceCode r">
740
+<p><em>runTests</em> is the main function in <strong>ClassifyR</strong>
741
+which handles the sample splitting and parallelisation, if used, of
742
+cross-validation. To begin with, a simple classifier will be
743
+demonstrated. It uses a t-test or ANOVA ranking (depending on the number
744
+of classes) for feature ranking and DLDA for classification. This
745
+classifier relies on differences in means between classes. No parameters
746
+need to be specified, because this is the default classification of
747
+<em>runTests</em>. By default, the number of features is tuned by
748
+resubstitution on the training set.</p>
749
+<div class="sourceCode" id="cb21"><pre class="downlit sourceCode r">
594 750
 <code class="sourceCode R"><span><span class="va">crossValParams</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/CrossValParams-class.html">CrossValParams</a></span><span class="op">(</span>permutations <span class="op">=</span> <span class="fl">5</span><span class="op">)</span></span>
595 751
 <span><span class="va">DMresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
596
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
597
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
598
-<p>Here, 5 permutations (non-default) and 5 folds cross-validation (default) is specified. For computers with more than 1 CPU, the number of cores to use can be given to <em>runTests</em> by using the argument <em>parallelParams</em>. The parameter <em>seed</em> is important to set for result reproducibility when doing a cross-validation such as this, because it employs randomisation to partition the samples into folds. Also, <em>RNGseed</em> is highly recommended to be set to the back-end specified to <em>BPPARAM</em> if doing parallel processing. The first seed mentioned does not work for parallel processes. For more details about <em>runTests</em> and the parameter classes used by it, consult the help pages of such functions.</p>
752
+<p>Here, 5 permutations (non-default) and 5 folds cross-validation
753
+(default) is specified. For computers with more than 1 CPU, the number
754
+of cores to use can be given to <em>runTests</em> by using the argument
755
+<em>parallelParams</em>. The parameter <em>seed</em> is important to set
756
+for result reproducibility when doing a cross-validation such as this,
757
+because it employs randomisation to partition the samples into folds.
758
+Also, <em>RNGseed</em> is highly recommended to be set to the back-end
759
+specified to <em>BPPARAM</em> if doing parallel processing. The first
760
+seed mentioned does not work for parallel processes. For more details
761
+about <em>runTests</em> and the parameter classes used by it, consult
762
+the help pages of such functions.</p>
599 763
 </div>
600 764
 </div>
601 765
 <div class="section level2">
602 766
 <h2 id="evaluation-of-a-classification">Evaluation of a Classification<a class="anchor" aria-label="anchor" href="#evaluation-of-a-classification"></a>
603 767
 </h2>
604
-<p>The most frequently selected gene can be identified using the <em>distribution</em> function and its relative abundance values for all samples can be displayed visually by <em>plotFeatureClasses</em>.</p>
605
-<div class="sourceCode" id="cb26"><pre class="downlit sourceCode r">
768
+<p>The most frequently selected gene can be identified using the
769
+<em>distribution</em> function and its relative abundance values for all
770
+samples can be displayed visually by <em>plotFeatureClasses</em>.</p>
771
+<div class="sourceCode" id="cb22"><pre class="downlit sourceCode r">
606 772
 <code class="sourceCode R"><span><span class="va">selectionPercentages</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/distribution.html">distribution</a></span><span class="op">(</span><span class="va">DMresults</span>, plot <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span></span>
607 773
 <span><span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">selectionPercentages</span><span class="op">)</span></span>
608 774
 <span><span class="va">sortedPercentages</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/utils/head.html" class="external-link">head</a></span><span class="op">(</span><span class="va">selectionPercentages</span><span class="op">[</span><span class="fu"><a href="https://rdrr.io/r/base/order.html" class="external-link">order</a></span><span class="op">(</span><span class="va">selectionPercentages</span>, decreasing <span class="op">=</span> <span class="cn">TRUE</span><span class="op">)</span><span class="op">]</span><span class="op">)</span></span>
... ...
@@ -616,15 +782,24 @@
616 782
 <span><span class="co">##   Please report the issue to the authors.</span></span></code></pre>
617 783
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-11-1.png" width="768"></p>
618 784
 <pre><code><span><span class="co">## allFeaturesText</span></span>
619
-<span><span class="co">##   ANKMY1 ARHGAP39 C10orf95 C19orf51  C2orf55 C6orf108 </span></span>
620
-<span><span class="co">##        8       64      100       80        4       12 </span></span>
785
+<span><span class="co">## ARHGAP39 C10orf95 C19orf51 C6orf108 C6orf154  C6orf27 </span></span>
786
+<span><span class="co">##       60       96       48        8       12        8 </span></span>
621 787
 <span><span class="co">## allFeaturesText</span></span>
622
-<span><span class="co">## C10orf95    CROCC    SSBP4   ZDHHC1  TMEM190 C19orf51 </span></span>
623
-<span><span class="co">##      100      100      100      100       84       80</span></span></code></pre>
624
-<p>The means of the abundance levels of C10orf95 are substantially different between the people with and without asthma. <em>plotFeatureClasses</em> can also plot categorical data, such as may be found in a clinical data table, as a bar chart.</p>
625
-<p>Classification error rates, as well as many other prediction performance measures, can be calculated with <em>calcCVperformance</em>. Next, the balanced accuracy rate is calculated considering all samples, each of which was in the test set once. The balanced accuracy rate is defined as the average rate of the correct classifications of each class.</p>
626
-<p>See the documentation of <em>calcCVperformance</em> for a list of performance metrics which may be calculated.</p>
627
-<div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
788
+<span><span class="co">##    SSBP4   ZDHHC1 C10orf95    CROCC  TMEM190    CTXN1 </span></span>
789
+<span><span class="co">##      100      100       96       96       76       72</span></span></code></pre>
790
+<p>The means of the abundance levels of SSBP4 are substantially
791
+different between the people with and without asthma.
792
+<em>plotFeatureClasses</em> can also plot categorical data, such as may
793
+be found in a clinical data table, as a bar chart.</p>
794
+<p>Classification error rates, as well as many other prediction
795
+performance measures, can be calculated with <em>calcCVperformance</em>.
796
+Next, the balanced accuracy rate is calculated considering all samples,
797
+each of which was in the test set once. The balanced accuracy rate is
798
+defined as the average rate of the correct classifications of each
799
+class.</p>
800
+<p>See the documentation of <em>calcCVperformance</em> for a list of
801
+performance metrics which may be calculated.</p>
802
+<div class="sourceCode" id="cb25"><pre class="downlit sourceCode r">
628 803
 <code class="sourceCode R"><span><span class="va">DMresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DMresults</span><span class="op">)</span></span>
629 804
 <span><span class="va">DMresults</span></span></code></pre></div>
630 805
 <pre><code><span><span class="co">## An object of class 'ClassifyResult'.</span></span>
... ...
@@ -636,25 +811,32 @@
636 811
 <span><span class="co">## Features: List of length 25 of feature identifiers.</span></span>
637 812
 <span><span class="co">## Predictions: A data frame of 950 rows.</span></span>
638 813
 <span><span class="co">## Performance Measures: Balanced Accuracy.</span></span></code></pre>
639
-<div class="sourceCode" id="cb31"><pre class="downlit sourceCode r">
814
+<div class="sourceCode" id="cb27"><pre class="downlit sourceCode r">
640 815
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/ClassifyResult-class.html">performance</a></span><span class="op">(</span><span class="va">DMresults</span><span class="op">)</span></span></code></pre></div>
641 816
 <pre><code><span><span class="co">## $`Balanced Accuracy`</span></span>
642 817
 <span><span class="co">##         1         2         3         4         5 </span></span>
643
-<span><span class="co">## 0.7850684 0.7931329 0.8011975 0.8047410 0.8077957</span></span></code></pre>
644
-<p>The error rate is about 20%. If only a vector of predictions and a vector of actual classes is available, such as from an old study which did not use <strong>ClassifyR</strong> for cross-validation, then <em>calcExternalPerformance</em> can be used on a pair of factor vectors which have the same length.</p>
818
+<span><span class="co">## 0.8047410 0.7997312 0.7926442 0.8047410 0.7931329</span></span></code></pre>
819
+<p>The error rate is about 20%. If only a vector of predictions and a
820
+vector of actual classes is available, such as from an old study which
821
+did not use <strong>ClassifyR</strong> for cross-validation, then
822
+<em>calcExternalPerformance</em> can be used on a pair of factor vectors
823
+which have the same length.</p>
645 824
 <div class="section level3">
646 825
 <h3 id="comparison-of-different-classifications">Comparison of Different Classifications<a class="anchor" aria-label="anchor" href="#comparison-of-different-classifications"></a>
647 826
 </h3>
648
-<p>The <em>samplesMetricMap</em> function allows the visual comparison of sample-wise error rate or accuracy measures from different <em>ClassifyResult</em> objects. Firstly, a classifier will be run that uses Kullback-Leibler divergence ranking and resubstitution error as a feature selection heuristic and a naive Bayes classifier for classification. This classification will use features that have either a change in location or in scale between classes.</p>
649
-<div class="sourceCode" id="cb33"><pre class="downlit sourceCode r">
827
+<p>The <em>samplesMetricMap</em> function allows the visual comparison
828
+of sample-wise error rate or accuracy measures from different
829
+<em>ClassifyResult</em> objects. Firstly, a classifier will be run that
830
+uses Kullback-Leibler divergence ranking and resubstitution error as a
831
+feature selection heuristic and a naive Bayes classifier for
832
+classification. This classification will use features that have either a
833
+change in location or in scale between classes.</p>
834
+<div class="sourceCode" id="cb29"><pre class="downlit sourceCode r">
650 835
 <code class="sourceCode R"><span><span class="va">modellingParamsDD</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span>selectParams <span class="op">=</span> <span class="fu"><a href="../reference/SelectParams-class.html">SelectParams</a></span><span class="op">(</span><span class="st">"KL"</span><span class="op">)</span>,</span>
651 836
 <span>                                     trainParams <span class="op">=</span> <span class="fu"><a href="../reference/TrainParams-class.html">TrainParams</a></span><span class="op">(</span><span class="st">"naiveBayes"</span><span class="op">)</span>,</span>
652 837
 <span>                                     predictParams <span class="op">=</span> <span class="cn">NULL</span><span class="op">)</span></span>
653
-<span><span class="va">DDresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">modellingParamsDD</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span></code></pre></div>
654
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
655
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
656
-<div class="sourceCode" id="cb36"><pre class="downlit sourceCode r">
657
-<code class="sourceCode R"><span><span class="va">DDresults</span></span></code></pre></div>
838
+<span><span class="va">DDresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">modellingParamsDD</span>, verbose <span class="op">=</span> <span class="fl">1</span><span class="op">)</span></span>
839
+<span><span class="va">DDresults</span></span></code></pre></div>
658 840
 <pre><code><span><span class="co">## An object of class 'ClassifyResult'.</span></span>
659 841
 <span><span class="co">## Characteristics:</span></span>
660 842
 <span><span class="co">##    characteristic                       value</span></span>
... ...
@@ -664,9 +846,15 @@
664 846
 <span><span class="co">## Features: List of length 25 of feature identifiers.</span></span>
665 847
 <span><span class="co">## Predictions: A data frame of 950 rows.</span></span>
666 848
 <span><span class="co">## Performance Measures: None calculated yet.</span></span></code></pre>
667
-<p>The naive Bayes kernel classifier by default uses the vertical distance between class densities but it can instead use the horizontal distance to the nearest non-zero density cross-over point to confidently classify samples in the tails of the densities.</p>
668
-<p>Now, the classification error for each sample is also calculated for both the differential means and differential distribution classifiers and both <em>ClassifyResult</em> objects generated so far are plotted with <em>samplesMetricMap</em>.</p>
669
-<div class="sourceCode" id="cb38"><pre class="downlit sourceCode r">
849
+<p>The naive Bayes kernel classifier by default uses the vertical
850
+distance between class densities but it can instead use the horizontal
851
+distance to the nearest non-zero density cross-over point to confidently
852
+classify samples in the tails of the densities.</p>
853
+<p>Now, the classification error for each sample is also calculated for
854
+both the differential means and differential distribution classifiers
855
+and both <em>ClassifyResult</em> objects generated so far are plotted
856
+with <em>samplesMetricMap</em>.</p>
857
+<div class="sourceCode" id="cb31"><pre class="downlit sourceCode r">
670 858
 <code class="sourceCode R"><span><span class="va">DMresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DMresults</span>, <span class="st">"Sample Error"</span><span class="op">)</span></span>
671 859
 <span><span class="va">DDresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/calcPerformance.html">calcCVperformance</a></span><span class="op">(</span><span class="va">DDresults</span>, <span class="st">"Sample Error"</span><span class="op">)</span></span>
672 860
 <span><span class="va">resultsList</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>Abundance <span class="op">=</span> <span class="va">DMresults</span>, Distribution <span class="op">=</span> <span class="va">DDresults</span><span class="op">)</span></span>
... ...
@@ -678,101 +866,178 @@
678 866
 <span><span class="co">##   z     cells    name                grob</span></span>
679 867
 <span><span class="co">## 1 1 (2-2,1-1) arrange      gtable[layout]</span></span>
680 868
 <span><span class="co">## 2 2 (1-1,1-1) arrange text[GRID.text.533]</span></span></code></pre>
681
-<p>The benefit of this plot is that it allows the easy identification of samples which are hard to classify and could be explained by considering additional information about them. Differential distribution class prediction appears to be biased to the majority class (No Asthma).</p>
682
-<p>More traditionally, the distribution of performance values of each complete cross-validation can be visualised by <em>performancePlot</em> by providing them as a list to the function. The default is to draw box plots, but violin plots could also be made. The default performance metric to plot is balanced accuracy. If it’s not already calculated for all classifications, as in this case for DD, it will be done automatically.</p>
683
-<div class="sourceCode" id="cb41"><pre class="downlit sourceCode r">
869
+<p>The benefit of this plot is that it allows the easy identification of
870
+samples which are hard to classify and could be explained by considering
871
+additional information about them. Differential distribution class
872
+prediction appears to be biased to the majority class (No Asthma).</p>
873
+<p>More traditionally, the distribution of performance values of each
874
+complete cross-validation can be visualised by <em>performancePlot</em>
875
+by providing them as a list to the function. The default is to draw box
876
+plots, but violin plots could also be made. The default performance
877
+metric to plot is balanced accuracy. If it’s not already calculated for
878
+all classifications, as in this case for DD, it will be done
879
+automatically.</p>
880
+<div class="sourceCode" id="cb34"><pre class="downlit sourceCode r">
684 881
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/performancePlot.html">performancePlot</a></span><span class="op">(</span><span class="va">resultsList</span><span class="op">)</span></span></code></pre></div>
685 882
 <pre><code><span><span class="co">## Warning in .local(results, ...): Balanced Accuracy not found in all elements of results. Calculating it now.</span></span></code></pre>
686 883
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-15-1.png" width="700"></p>
687
-<p>We can observe that the spread of balanced accuracy rates is small, but slightly wider for the differential distribution classifier.</p>
688
-<p>The features being ranked and selected in the feature selection stage can be compared within and between classifiers by the plotting functions <em>rankingPlot</em> and <em>selectionPlot</em>. Consider the task of visually representing how consistent the feature rankings of the top 100 different features were for the differential distribution classifier for all 5 folds in the 5 cross-validations.</p>
689
-<div class="sourceCode" id="cb43"><pre class="downlit sourceCode r">
884
+<p>We can observe that the spread of balanced accuracy rates is small,
885
+but slightly wider for the differential distribution classifier.</p>
886
+<p>The features being ranked and selected in the feature selection stage
887
+can be compared within and between classifiers by the plotting functions
888
+<em>rankingPlot</em> and <em>selectionPlot</em>. Consider the task of
889
+visually representing how consistent the feature rankings of the top 100
890
+different features were for the differential distribution classifier for
891
+all 5 folds in the 5 cross-validations.</p>
892
+<div class="sourceCode" id="cb36"><pre class="downlit sourceCode r">
690 893
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/rankingPlot.html">rankingPlot</a></span><span class="op">(</span><span class="va">DDresults</span>, topRanked <span class="op">=</span> <span class="fl">1</span><span class="op">:</span><span class="fl">100</span>, xLabelPositions <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">1</span>, <span class="fu"><a href="https://rdrr.io/r/base/seq.html" class="external-link">seq</a></span><span class="op">(</span><span class="fl">10</span>, <span class="fl">100</span>, <span class="fl">10</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
691 894
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-16-1.png" width="700"></p>
692
-<p>The top-ranked features are fairly similar between all pairs of the 20 cross-validations.</p>
693
-<p>For a large cross-validation scheme, such as leave-2-out cross-validation, or when <em>results</em> contains many classifications, there are many feature set comparisons to make. Note that <em>rankingPlot</em> and <em>selectionPlot</em> have a <em>parallelParams</em> options which allows for the calculation of feature set overlaps to be done on multiple processors.</p>
895
+<p>The top-ranked features are fairly similar between all pairs of the
896
+20 cross-validations.</p>
897
+<p>For a large cross-validation scheme, such as leave-2-out
898
+cross-validation, or when <em>results</em> contains many
899
+classifications, there are many feature set comparisons to make. Note
900
+that <em>rankingPlot</em> and <em>selectionPlot</em> have a
901
+<em>parallelParams</em> options which allows for the calculation of
902
+feature set overlaps to be done on multiple processors.</p>
694 903
 </div>
695 904
 <div class="section level3">
696 905
 <h3 id="generating-a-roc-plot">Generating a ROC Plot<a class="anchor" aria-label="anchor" href="#generating-a-roc-plot"></a>
697 906
 </h3>
698
-<p>Some classifiers can output scores or probabilities representing how likely a sample is to be from one of the classes, instead of, or as well as, class labels. This enables different score thresholds to be tried, to generate pairs of false positive and false negative rates. The naive Bayes classifier used previously by default has its <em>returnType</em> parameter set to <em>“both”</em>, so class predictions and scores are both stored in the classification result. So does diagonal LDA. In this case, a data frame with class predictions and scores for each class is returned by the classifier to the cross-validation framework. Setting <em>returnType</em> to <em>“score”</em> for a classifier which has such an option is also sufficient to generate a ROC plot. Many existing classifiers in other R packages also have an option that allows a score or probability to be calculated.</p>
699
-<p>By default, scores from different iterations of prediction are merged and one line is drawn per classification. Alternatively, setting <em>mode = “average”</em> will consider each iteration of prediction separately, average them and also calculate and draw confidence intervals. The default interval is a 95% interval and is customisable by setting <em>interval</em>.</p>
700
-<div class="sourceCode" id="cb44"><pre class="downlit sourceCode r">
907
+<p>Some classifiers can output scores or probabilities representing how
908
+likely a sample is to be from one of the classes, instead of, or as well
909
+as, class labels. This enables different score thresholds to be tried,
910
+to generate pairs of false positive and false negative rates. The naive
911
+Bayes classifier used previously by default has its <em>returnType</em>
912
+parameter set to <em>“both”</em>, so class predictions and scores are
913
+both stored in the classification result. So does diagonal LDA. In this
914
+case, a data frame with class predictions and scores for each class is
915
+returned by the classifier to the cross-validation framework. Setting
916
+<em>returnType</em> to <em>“score”</em> for a classifier which has such
917
+an option is also sufficient to generate a ROC plot. Many existing
918
+classifiers in other R packages also have an option that allows a score
919
+or probability to be calculated.</p>
920
+<p>By default, scores from different iterations of prediction are merged
921
+and one line is drawn per classification. Alternatively, setting
922
+<em>mode = “average”</em> will consider each iteration of prediction
923
+separately, average them and also calculate and draw confidence
924
+intervals. The default interval is a 95% interval and is customisable by
925
+setting <em>interval</em>.</p>
926
+<div class="sourceCode" id="cb37"><pre class="downlit sourceCode r">
701 927
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/ROCplot.html">ROCplot</a></span><span class="op">(</span><span class="va">resultsList</span>, fontSizes <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">24</span>, <span class="fl">12</span>, <span class="fl">12</span>, <span class="fl">12</span>, <span class="fl">12</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
702 928
 <p><img src="ClassifyR_files/figure-html/unnamed-chunk-17-1.png" width="576"></p>
703
-<p>This ROC plot shows the classifiability of the asthma data set is high. Some examples of functions which output scores are <em>fisherDiscriminant</em>, <em>DLDApredictInterface</em>, and <em>SVMpredictInterface</em>.</p>
929
+<p>This ROC plot shows the classifiability of the asthma data set is
930
+high. Some examples of functions which output scores are
931
+<em>fisherDiscriminant</em>, <em>DLDApredictInterface</em>, and
932
+<em>SVMpredictInterface</em>.</p>
704 933
 </div>
705 934
 </div>
706 935
 <div class="section level2">
707 936
 <h2 id="other-use-cases">Other Use Cases<a class="anchor" aria-label="anchor" href="#other-use-cases"></a>
708 937
 </h2>
709
-<p>Apart from cross-validation of one data set, ClassifyR can be used in a couple of other ways.</p>
938
+<p>Apart from cross-validation of one data set, ClassifyR can be used in
939
+a couple of other ways.</p>
710 940
 <div class="section level3">
711 941
 <h3 id="using-an-independent-test-set">Using an Independent Test Set<a class="anchor" aria-label="anchor" href="#using-an-independent-test-set"></a>
712 942
 </h3>
713
-<p>Sometimes, cross-validation is unnecessary. This happens when studies have large sample sizes and are designed such that a large number of samples is prespecified to form a test set. The classifier is only trained on the training sample set, and makes predictions only on the test sample set. This can be achieved by using the function <em>runTest</em> directly. See its documentation for required inputs.</p>
943
+<p>Sometimes, cross-validation is unnecessary. This happens when studies
944
+have large sample sizes and are designed such that a large number of
945
+samples is prespecified to form a test set. The classifier is only
946
+trained on the training sample set, and makes predictions only on the
947
+test sample set. This can be achieved by using the function
948
+<em>runTest</em> directly. See its documentation for required
949
+inputs.</p>
714 950
 </div>
715 951
 <div class="section level3">
716 952
 <h3 id="cross-validating-selected-features-on-a-different-data-set">Cross-validating Selected Features on a Different Data Set<a class="anchor" aria-label="anchor" href="#cross-validating-selected-features-on-a-different-data-set"></a>
717 953
 </h3>
718
-<p>Once a cross-validated classification is complete, the usefulness of the features selected may be explored in another dataset. <em>previousSelection</em> is a function which takes an existing <em>ClassifyResult</em> object and returns the features selected at the equivalent iteration which is currently being processed. This is necessary, because the models trained on one data set are not directly transferrable to a new dataset; the classifier training (e.g. choosing thresholds, fitting model coefficients) is redone. Of course, the features in the new dataset should have the same naming system as the ones in the old dataset.</p>
954
+<p>Once a cross-validated classification is complete, the usefulness of
955
+the features selected may be explored in another dataset.
956
+<em>previousSelection</em> is a function which takes an existing
957
+<em>ClassifyResult</em> object and returns the features selected at the
958
+equivalent iteration which is currently being processed. This is
959
+necessary, because the models trained on one data set are not directly
960
+transferrable to a new dataset; the classifier training (e.g. choosing
961
+thresholds, fitting model coefficients) is redone. Of course, the
962
+features in the new dataset should have the same naming system as the
963
+ones in the old dataset.</p>
719 964
 </div>
720 965
 <div class="section level3">
721 966
 <h3 id="parameter-tuning">Parameter Tuning<a class="anchor" aria-label="anchor" href="#parameter-tuning"></a>
722 967
 </h3>
723
-<p>Some feature ranking methods or classifiers allow the choosing of tuning parameters, which controls some aspect of their model learning. An example of doing parameter tuning with a linear SVM is presented. This particular SVM has a single tuning parameter, the cost. Higher values of this parameter penalise misclassifications more. Moreover, feature selection happens by using a feature ranking function and then trying a range of top-ranked features to see which gives the best performance, the range being specified by a list element named <em>nFeatures</em> and the performance type (e.g. Balanced Accuracy) specified by a list element named <em>performanceType</em>. Therefore, some kind of parameter tuning always happens, even if the feature ranking or classifier function does not have any explicit tuning parameters.</p>
724
-<p>Tuning is achieved in ClassifyR by providing a variable called <em>tuneParams</em> to the SelectParams or TrainParams constructor. <em>tuneParams</em> is a named list, with the names being the names of the tuning variables, except for one which is named <em>“performanceType”</em> and specifies the performance metric to use for picking the parameter values. Any of the non-sample-specific performance metrics which <em>calcCVperformance</em> calculates can be optimised.</p>
725
-<div class="sourceCode" id="cb45"><pre class="downlit sourceCode r">
968
+<p>Some feature ranking methods or classifiers allow the choosing of
969
+tuning parameters, which controls some aspect of their model learning.
970
+An example of doing parameter tuning with a linear SVM is presented.
971
+This particular SVM has a single tuning parameter, the cost. Higher
972
+values of this parameter penalise misclassifications more. Moreover,
973
+feature selection happens by using a feature ranking function and then
974
+trying a range of top-ranked features to see which gives the best
975
+performance, the range being specified by a list element named
976
+<em>nFeatures</em> and the performance type (e.g. Balanced Accuracy)
977
+specified by a list element named <em>performanceType</em>. Therefore,
978
+some kind of parameter tuning always happens, even if the feature
979
+ranking or classifier function does not have any explicit tuning
980
+parameters.</p>
981
+<p>Tuning is achieved in ClassifyR by providing a variable called
982
+<em>tuneParams</em> to the SelectParams or TrainParams constructor.
983
+<em>tuneParams</em> is a named list, with the names being the names of
984
+the tuning variables, except for one which is named
985
+<em>“performanceType”</em> and specifies the performance metric to use
986
+for picking the parameter values. Any of the non-sample-specific
987
+performance metrics which <em>calcCVperformance</em> calculates can be
988
+optimised.</p>
989
+<div class="sourceCode" id="cb38"><pre class="downlit sourceCode r">
726 990
 <code class="sourceCode R"><span><span class="va">tuneList</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/list.html" class="external-link">list</a></span><span class="op">(</span>cost <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.01</span>, <span class="fl">0.1</span>, <span class="fl">1</span>, <span class="fl">10</span><span class="op">)</span><span class="op">)</span></span>
727 991
 <span><span class="va">SVMparams</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/ModellingParams-class.html">ModellingParams</a></span><span class="op">(</span>trainParams <span class="op">=</span> <span class="fu"><a href="../reference/TrainParams-class.html">TrainParams</a></span><span class="op">(</span><span class="st">"SVM"</span>, kernel <span class="op">=</span> <span class="st">"linear"</span>, tuneParams <span class="op">=</span> <span class="va">tuneList</span><span class="op">)</span>,</span>
728 992
 <span>                             predictParams <span class="op">=</span> <span class="fu"><a href="../reference/PredictParams-class.html">PredictParams</a></span><span class="op">(</span><span class="st">"SVM"</span><span class="op">)</span><span class="op">)</span></span>
729 993
 <span><span class="va">SVMresults</span> <span class="op">&lt;-</span> <span class="fu"><a href="../reference/runTests.html">runTests</a></span><span class="op">(</span><span class="va">measurements</span>, <span class="va">classes</span>, <span class="va">crossValParams</span>, <span class="va">SVMparams</span><span class="op">)</span></span></code></pre></div>
730
-<pre><code><span><span class="co">## Processing sample set 10.</span></span></code></pre>
731
-<pre><code><span><span class="co">## Processing sample set 20.</span></span></code></pre>
732
-<p>The index of chosen of the parameters, as well as all combinations of parameters and their associated performance metric, are stored for every validation, and can be accessed with the <em>tunedParameters</em> function.</p>
733
-<div class="sourceCode" id="cb48"><pre class="downlit sourceCode r">
994
+<p>The index of chosen of the parameters, as well as all combinations of
995
+parameters and their associated performance metric, are stored for every
996
+validation, and can be accessed with the <em>tunedParameters</em>
997
+function.</p>
998
+<div class="sourceCode" id="cb39"><pre class="downlit sourceCode r">
734 999
 <code class="sourceCode R"><span><span class="fu"><a href="https://rdrr.io/r/base/length.html" class="external-link">length</a></span><span class="op">(</span><span class="fu"><a href="../reference/ClassifyResult-class.html">tunedParameters</a></span><span class="op">(</span><span class="va">SVMresults</span><span class="op">)</span><span class="op">)</span></span></code></pre></div>
735 1000
 <pre><code><span><span class="co">## [1] 25</span></span></code></pre>
736
-<div class="sourceCode" id="cb50"><pre class="downlit sourceCode r">
1001
+<div class="sourceCode" id="cb41"><pre class="downlit sourceCode r">
737 1002
 <code class="sourceCode R"><span><span class="fu"><a href="../reference/ClassifyResult-class.html">tunedParameters</a></span><span class="op">(</span><span class="va">SVMresults</span><span class="op">)</span><span class="op">[</span><span class="fl">1</span><span class="op">:</span><span class="fl">5</span><span class="op">]</span></span></code></pre></div>
738 1003
 <pre><code><span><span class="co">## [[1]]</span></span>
739 1004
 <span><span class="co">## [[1]]$tuneCombinations</span></span>
740 1005
 <span><span class="co">##    topN  cost Balanced Accuracy</span></span>
741
-<span><span class="co">## 1    10  0.01         0.8507719</span></span>
742
-<span><span class="co">## 2    20  0.01         0.8551553</span></span>
743
-<span><span class="co">## 3    30  0.01         0.8696398</span></span>
744
-<span><span class="co">## 4    40  0.01         0.9073756</span></span>
745
-<span><span class="co">## 5    50  0.01         0.8986087</span></span>
746
-<span><span class="co">## 6    60  0.01         0.8986087</span></span>
747
-<span><span class="co">## 7    70  0.01         0.8942253</span></span>
748
-<span><span class="co">## 8    80  0.01         0.9036592</span></span>
749
-<span><span class="co">## 9    90  0.01         0.9036592</span></span>
750
-<span><span class="co">## 10  100  0.01         0.8986087</span></span>
751
-<span><span class="co">## 11   10  0.10         0.8608729</span></span>
752
-<span><span class="co">## 12   20  0.10         0.8942253</span></span>
753
-<span><span class="co">## 13   30  0.10         0.8746903</span></span>
754
-<span><span class="co">## 14   40  0.10         0.9188107</span></span>
755
-<span><span class="co">## 15   50  0.10         0.9087097</span></span>
756
-<span><span class="co">## 16   60  0.10         0.9137602</span></span>
757
-<span><span class="co">## 17   70  0.10         0.9188107</span></span>
758
-<span><span class="co">## 18   80  0.10         0.9137602</span></span>
759
-<span><span class="co">## 19   90  0.10         0.9238613</span></span>
760
-<span><span class="co">## 20  100  0.10         0.9477797</span></span>
761
-<span><span class="co">## 21   10  1.00         0.8992758</span></span>
762
-<span><span class="co">## 22   20  1.00         0.8898418</span></span>
763
-<span><span class="co">## 23   30  1.00         0.9144273</span></span>
764
-<span><span class="co">## 24   40  1.00         0.9049933</span></span>
765
-<span><span class="co">## 25   50  1.00         0.9666476</span></span>
766
-<span><span class="co">## 26   60  1.00         0.9811321</span></span>
767
-<span><span class="co">## 27   70  1.00         0.9855155</span></span>
768
-<span><span class="co">## 28   80  1.00         1.0000000</span></span>
769
-<span><span class="co">## 29   90  1.00         1.0000000</span></span>
1006
+<span><span class="co">## 1    10  0.01         0.7746331</span></span>
1007
+<span><span class="co">## 2    20  0.01         0.7847341</span></span>
1008
+<span><span class="co">## 3    30  0.01         0.7746331</span></span>
1009
+<span><span class="co">## 4    40  0.01         0.8167524</span></span>
1010
+<span><span class="co">## 5    50  0.01         0.8022680</span></span>
1011
+<span><span class="co">## 6    60  0.01         0.8457214</span></span>
1012
+<span><span class="co">## 7    70  0.01         0.8551553</span></span>
1013
+<span><span class="co">## 8    80  0.01         0.8551553</span></span>
1014
+<span><span class="co">## 9    90  0.01         0.8645893</span></span>
1015
+<span><span class="co">## 10  100  0.01         0.8885077</span></span>
1016
+<span><span class="co">## 11   10  0.10         0.8218029</span></span>
1017
+<span><span class="co">## 12   20  0.10         0.8608729</span></span>
1018
+<span><span class="co">## 13   30  0.10         0.8797408</span></span>
1019
+<span><span class="co">## 14   40  0.10         0.9043263</span></span>
1020
+<span><span class="co">## 15   50  0.10         0.9238613</span></span>
1021
+<span><span class="co">## 16   60  0.10         0.9282447</span></span>
1022
+<span><span class="co">## 17   70  0.10         0.9231942</span></span>
1023
+<span><span class="co">## 18   80  0.10         0.9282447</span></span>
1024
+<span><span class="co">## 19   90  0.10         0.9137602</span></span>
1025
+<span><span class="co">## 20  100  0.10         0.9622642</span></span>
1026
+<span><span class="co">## 21   10  1.00         0.8709739</span></span>
1027
+<span><span class="co">## 22   20  1.00         0.8659234</span></span>
1028
+<span><span class="co">## 23   30  1.00         0.8716409</span></span>
1029
+<span><span class="co">## 24   40  1.00         0.8861254</span></span>
1030
+<span><span class="co">## 25   50  1.00         0.9383457</span></span>
1031
+<span><span class="co">## 26   60  1.00         0.9289118</span></span>
1032
+<span><span class="co">## 27   70  1.00         0.9615971</span></span>
1033
+<span><span class="co">## 28   80  1.00         0.9666476</span></span>
1034
+<span><span class="co">## 29   90  1.00         0.9760816</span></span>
770 1035
 <span><span class="co">## 30  100  1.00         1.0000000</span></span>
771
-<span><span class="co">## 31   10 10.00         0.9043263</span></span>
772
-<span><span class="co">## 32   20 10.00         0.8905089</span></span>
773
-<span><span class="co">## 33   30 10.00         0.9289118</span></span>
774
-<span><span class="co">## 34   40 10.00         0.9855155</span></span>
775
-<span><span class="co">## 35   50 10.00         1.0000000</span></span>
1036
+<span><span class="co">## 31   10 10.00         0.8652563</span></span>
1037
+<span><span class="co">## 32   20 10.00         0.8672575</span></span>
1038
+<span><span class="co">## 33   30 10.00         0.8955594</span></span>
1039
+<span><span class="co">## 34   40 10.00         0.9477797</span></span>
1040
+<span><span class="co">## 35   50 10.00         0.9949495</span></span>
776 1041
 <span><span class="co">## 36   60 10.00         1.0000000</span></span>
777 1042
 <span><span class="co">## 37   70 10.00         1.0000000</span></span>
778 1043
 <span><span class="co">## 38   80 10.00         1.0000000</span></span>
... ...
@@ -780,95 +1045,95 @@
780 1045
 <span><span class="co">## 40  100 10.00         1.0000000</span></span>
781 1046
 <span><span class="co">## </span></span>
782 1047
 <span><span class="co">## [[1]]$bestIndex</span></span>
783
-<span><span class="co">## [1] 28</span></span>
1048
+<span><span class="co">## [1] 30</span></span>
784 1049
 <span><span class="co">## </span></span>
785 1050
 <span><span class="co">## </span></span>
786 1051
 <span><span class="co">## [[2]]</span></span>
787 1052
 <span><span class="co">## [[2]]$tuneCombinations</span></span>
788 1053
 <span><span class="co">##    topN  cost Balanced Accuracy</span></span>
789
-<span><span class="co">## 1    10  0.01         0.8066514</span></span>
790
-<span><span class="co">## 2    20  0.01         0.7783495</span></span>
791
-<span><span class="co">## 3    30  0.01         0.7877835</span></span>
792
-<span><span class="co">## 4    40  0.01         0.7783495</span></span>
793
-<span><span class="co">## 5    50  0.01         0.8117019</span></span>
794
-<span><span class="co">## 6    60  0.01         0.8117019</span></span>
795
-<span><span class="co">## 7    70  0.01         0.8117019</span></span>
796
-<span><span class="co">## 8    80  0.01         0.8261864</span></span>
797
-<span><span class="co">## 9    90  0.01         0.8261864</span></span>
798
-<span><span class="co">## 10  100  0.01         0.8261864</span></span>
799
-<span><span class="co">## 11   10  0.10         0.7928340</span></span>
800
-<span><span class="co">## 12   20  0.10         0.8029350</span></span>
801
-<span><span class="co">## 13   30  0.10         0.8406709</span></span>
802
-<span><span class="co">## 14   40  0.10         0.8406709</span></span>
803
-<span><span class="co">## 15   50  0.10         0.8457214</span></span>
804
-<span><span class="co">## 16   60  0.10         0.8551553</span></span>
805
-<span><span class="co">## 17   70  0.10         0.9181437</span></span>
806
-<span><span class="co">## 18   80  0.10         0.9326282</span></span>
807
-<span><span class="co">## 19   90  0.10         0.9275777</span></span>
808
-<span><span class="co">## 20  100  0.10         0.9326282</span></span>
809
-<span><span class="co">## 21   10  1.00         0.7746331</span></span>
810
-<span><span class="co">## 22   20  1.00         0.8602058</span></span>
811
-<span><span class="co">## 23   30  1.00         0.8652563</span></span>
812
-<span><span class="co">## 24   40  1.00         0.9023251</span></span>
813
-<span><span class="co">## 25   50  1.00         0.9413951</span></span>
814
-<span><span class="co">## 26   60  1.00         0.9514961</span></span>
815
-<span><span class="co">## 27   70  1.00         0.9521631</span></span>
816
-<span><span class="co">## 28   80  1.00         0.9565466</span></span>
817
-<span><span class="co">## 29   90  1.00         0.9615971</span></span>
818
-<span><span class="co">## 30  100  1.00         0.9855155</span></span>
819
-<span><span class="co">## 31   10 10.00         0.7847341</span></span>
820
-<span><span class="co">## 32   20 10.00         0.8615399</span></span>
821
-<span><span class="co">## 33   30 10.00         0.8999428</span></span>
822
-<span><span class="co">## 34   40 10.00         0.9427292</span></span>
823
-<span><span class="co">## 35   50 10.00         0.9811321</span></span>
1054
+<span><span class="co">## 1    10  0.01         0.8218029</span></span>
1055
+<span><span class="co">## 2    20  0.01         0.8029350</span></span>
1056
+<span><span class="co">## 3    30  0.01         0.8180865</span></span>
1057
+<span><span class="co">## 4    40  0.01         0.8180865</span></span>
1058
+<span><span class="co">## 5    50  0.01         0.8268534</span></span>
1059
+<span><span class="co">## 6    60  0.01         0.8268534</span></span>
1060
+<span><span class="co">## 7    70  0.01         0.8268534</span></span>
1061
+<span><span class="co">## 8    80  0.01         0.8312369</span></span>
1062
+<span><span class="co">## 9    90  0.01         0.8652563</span></span>
1063
+<span><span class="co">## 10  100  0.01         0.8746903</span></span>
1064
+<span><span class="co">## 11   10  0.10         0.8073185</span></span>
1065
+<span><span class="co">## 12   20  0.10         0.8180865</span></span>
1066
+<span><span class="co">## 13   30  0.10         0.8942253</span></span>
1067
+<span><span class="co">## 14   40  0.10         0.8891748</span></span>
1068
+<span><span class="co">## 15   50  0.10         0.9420621</span></span>
1069
+<span><span class="co">## 16   60  0.10         0.9420621</span></span>
1070
+<span><span class="co">## 17   70  0.10         0.9420621</span></span>
1071
+<span><span class="co">## 18   80  0.10         0.9420621</span></span>
1072
+<span><span class="co">## 19   90  0.10         0.9420621</span></span>
1073
+<span><span class="co">## 20  100  0.10         0.9420621</span></span>
1074
+<span><span class="co">## 21   10  1.00         0.7985516</span></span>
1075
+<span><span class="co">## 22   20  1.00         0.8804079</span></span>
1076
+<span><span class="co">## 23   30  1.00         0.9087097</span></span>
1077
+<span><span class="co">## 24   40  1.00         0.9326282</span></span>
1078
+<span><span class="co">## 25   50  1.00         0.9710311</span></span>
1079
+<span><span class="co">## 26   60  1.00         0.9521631</span></span>
1080
+<span><span class="co">## 27   70  1.00         0.9760816</span></span>
1081
+<span><span class="co">## 28   80  1.00         0.9855155</span></span>
1082
+<span><span class="co">## 29   90  1.00         0.9760816</span></span>
1083
+<span><span class="co">## 30  100  1.00         0.9760816</span></span>
1084
+<span><span class="co">## 31   10 10.00         0.8275205</span></span>
1085
+<span><span class="co">## 32   20 10.00         0.8986087</span></span>
1086
+<span><span class="co">## 33   30 10.00         0.9477797</span></span>
1087
+<span><span class="co">## 34   40 10.00         0.9565466</span></span>
1088
+<span><span class="co">## 35   50 10.00         0.9760816</span></span>
824 1089
 <span><span class="co">## 36   60 10.00         0.9905660</span></span>
825
-<span><span class="co">## 37   70 10.00         0.9905660</span></span>
1090
+<span><span class="co">## 37   70 10.00         1.0000000</span></span>
826 1091
 <span><span class="co">## 38   80 10.00         1.0000000</span></span>
827 1092
 <span><span class="co">## 39   90 10.00         1.0000000</span></span>
828 1093
 <span><span class="co">## 40  100 10.00         1.0000000</span></span>
829 1094
 <span><span class="co">## </span></span>
830 1095
 <span><span class="co">## [[2]]$bestIndex</span></span>
831
-<span><span class="co">## [1] 38</span></span>
1096
+<span><span class="co">## [1] 37</span></span>
832 1097
 <span><span class="co">## </span></span>
833 1098
 <span><span class="co">## </span></span>
834 1099
 <span><span class="co">## [[3]]</span></span>
835 1100
 <span><span class="co">## [[3]]$tuneCombinations</span></span>
836 1101
 <span><span class="co">##    topN  cost Balanced Accuracy</span></span>
837
-<span><span class="co">## 1    10  0.01         0.7739661</span></span>
838
-<span><span class="co">## 2    20  0.01         0.8602058</span></span>
839
-<span><span class="co">## 3    30  0.01         0.8703068</span></span>
840
-<span><span class="co">## 4    40  0.01         0.8652563</span></span>
841
-<span><span class="co">## 5    50  0.01         0.8847913</span></span>
842
-<span><span class="co">## 6    60  0.01         0.8804079</span></span>
843
-<span><span class="co">## 7    70  0.01         0.8898418</span></span>
844
-<span><span class="co">## 8    80  0.01         0.8992758</span></span>
845
-<span><span class="co">## 9    90  0.01         0.8942253</span></span>
846
-<span><span class="co">## 10  100  0.01         0.9043263</span></span>
847
-<span><span class="co">## 11   10  0.10         0.8029350</span></span>
848
-<span><span class="co">## 12   20  0.10         0.9043263</span></span>
849
-<span><span class="co">## 13   30  0.10         0.9188107</span></span>
850
-<span><span class="co">## 14   40  0.10         0.9289118</span></span>
851
-<span><span class="co">## 15   50  0.10         0.8911759</span></span>
852
-<span><span class="co">## 16   60  0.10         0.9144273</span></span>
853
-<span><span class="co">## 17   70  0.10         0.9238613</span></span>
854
-<span><span class="co">## 18   80  0.10         0.9477797</span></span>
855
-<span><span class="co">## 19   90  0.10         0.9383457</span></span>
856
-<span><span class="co">## 20  100  0.10         0.9572136</span></span>
857
-<span><span class="co">## 21   10  1.00         0.8413379</span></span>
858
-<span><span class="co">## 22   20  1.00         0.9572136</span></span>
859
-<span><span class="co">## 23   30  1.00         0.9565466</span></span>
860
-<span><span class="co">## 24   40  1.00         0.9477797</span></span>
861
-<span><span class="co">## 25   50  1.00         0.9760816</span></span>
862
-<span><span class="co">## 26   60  1.00         0.9760816</span></span>
863
-<span><span class="co">## 27   70  1.00         0.9905660</span></span>
864
-<span><span class="co">## 28   80  1.00         0.9905660</span></span>
865
-<span><span class="co">## 29   90  1.00         0.9905660</span></span>
1102
+<span><span class="co">## 1    10  0.01         0.8218029</span></span>
1103
+<span><span class="co">## 2    20  0.01         0.8167524</span></span>
1104
+<span><span class="co">## 3    30  0.01         0.8211359</span></span>
1105
+<span><span class="co">## 4    40  0.01         0.8022680</span></span>
1106
+<span><span class="co">## 5    50  0.01         0.8117019</span></span>
1107
+<span><span class="co">## 6    60  0.01         0.8268534</span></span>
1108
+<span><span class="co">## 7    70  0.01         0.8362874</span></span>
1109
+<span><span class="co">## 8    80  0.01         0.8218029</span></span>
1110
+<span><span class="co">## 9    90  0.01         0.8413379</span></span>
1111
+<span><span class="co">## 10  100  0.01         0.8507719</span></span>
1112
+<span><span class="co">## 11   10  0.10         0.7978845</span></span>
1113
+<span><span class="co">## 12   20  0.10         0.8501048</span></span>
1114
+<span><span class="co">## 13   30  0.10         0.8841243</span></span>
1115
+<span><span class="co">## 14   40  0.10         0.8746903</span></span>
1116
+<span><span class="co">## 15   50  0.10         0.8928912</span></span>
1117
+<span><span class="co">## 16   60  0.10         0.9269106</span></span>
1118
+<span><span class="co">## 17   70  0.10         0.9225272</span></span>
1119
+<span><span class="co">## 18   80  0.10         0.9231942</span></span>
1120
+<span><span class="co">## 19   90  0.10         0.9521631</span></span>
1121
+<span><span class="co">## 20  100  0.10         0.9427292</span></span>
1122
+<span><span class="co">## 21   10  1.00         0.8224700</span></span>
1123
+<span><span class="co">## 22   20  1.00         0.8659234</span></span>
1124
+<span><span class="co">## 23   30  1.00         0.8948923</span></span>
1125
+<span><span class="co">## 24   40  1.00         0.9275777</span></span>
1126
+<span><span class="co">## 25   50  1.00         0.9514961</span></span>
1127
+<span><span class="co">## 26   60  1.00         0.9565466</span></span>
1128
+<span><span class="co">## 27   70  1.00         0.9716981</span></span>
1129
+<span><span class="co">## 28   80  1.00         0.9811321</span></span>
1130
+<span><span class="co">## 29   90  1.00         0.9949495</span></span>
866 1131
 <span><span class="co">## 30  100  1.00         1.0000000</span></span>
867
-<span><span class="co">## 31   10 10.00         0.8608729</span></span>
868
-<span><span class="co">## 32   20 10.00         0.9855155</span></span>
869
-<span><span class="co">## 33   30 10.00         0.9898990</span></span>
870
-<span><span class="co">## 34   40 10.00         1.0000000</span></span>
871
-<span><span class="co">## 35   50 10.00         1.0000000</span></span>
1132
+<span><span class="co">## 31   10 10.00         0.8281875</span></span>
1133
+<span><span class="co">## 32   20 10.00         0.8760244</span></span>
1134
+<span><span class="co">## 33   30 10.00         0.9471126</span></span>
1135
+<span><span class="co">## 34   40 10.00         0.9615971</span></span>
1136
+<span><span class="co">## 35   50 10.00         0.9905660</span></span>
872 1137
 <span><span class="co">## 36   60 10.00         1.0000000</span></span>
873 1138
 <span><span class="co">## 37   70 10.00         1.0000000</span></span>
874 1139
 <span><span class="co">## 38   80 10.00         1.0000000</span></span>
... ...
@@ -882,88 +1147,88 @@
882 1147
 <span><span class="co">## [[4]]</span></span>
883 1148
 <span><span class="co">## [[4]]$tuneCombinations</span></span>
884 1149
 <span><span class="co">##    topN  cost Balanced Accuracy</span></span>
885
-<span><span class="co">## 1    10  0.01         0.8073185</span></span>
886
-<span><span class="co">## 2    20  0.01         0.8167524</span></span>
887
-<span><span class="co">## 3    30  0.01         0.8073185</span></span>
888
-<span><span class="co">## 4    40  0.01         0.8073185</span></span>
889
-<span><span class="co">## 5    50  0.01         0.8463884</span></span>
890
-<span><span class="co">## 6    60  0.01         0.8463884</span></span>
891
-<span><span class="co">## 7    70  0.01         0.8420050</span></span>
892
-<span><span class="co">## 8    80  0.01         0.8275205</span></span>
893
-<span><span class="co">## 9    90  0.01         0.8703068</span></span>
894
-<span><span class="co">## 10  100  0.01         0.8746903</span></span>
895
-<span><span class="co">## 11   10  0.10         0.8079855</span></span>
896
-<span><span class="co">## 12   20  0.10         0.8268534</span></span>
897
-<span><span class="co">## 13   30  0.10         0.8224700</span></span>
898
-<span><span class="co">## 14   40  0.10         0.8325710</span></span>
899
-<span><span class="co">## 15   50  0.10         0.8942253</span></span>
900
-<span><span class="co">## 16   60  0.10         0.9036592</span></span>
901
-<span><span class="co">## 17   70  0.10         0.9130932</span></span>
902
-<span><span class="co">## 18   80  0.10         0.9181437</span></span>
903
-<span><span class="co">## 19   90  0.10         0.9188107</span></span>
904
-<span><span class="co">## 20  100  0.10         0.9188107</span></span>
905
-<span><span class="co">## 21   10  1.00         0.7891176</span></span>
906
-<span><span class="co">## 22   20  1.00         0.8137031</span></span>
907
-<span><span class="co">## 23   30  1.00         0.8608729</span></span>
908
-<span><span class="co">## 24   40  1.00         0.8665904</span></span>
909
-<span><span class="co">## 25   50  1.00         0.9565466</span></span>
910
-<span><span class="co">## 26   60  1.00         0.9471126</span></span>
911
-<span><span class="co">## 27   70  1.00         0.9565466</span></span>
912
-<span><span class="co">## 28   80  1.00         0.9754145</span></span>
913
-<span><span class="co">## 29   90  1.00         0.9754145</span></span>
914
-<span><span class="co">## 30  100  1.00         0.9848485</span></span>
915
-<span><span class="co">## 31   10 10.00         0.8036021</span></span>
916
-<span><span class="co">## 32   20 10.00         0.8376215</span></span>
917
-<span><span class="co">## 33   30 10.00         0.9194778</span></span>
918
-<span><span class="co">## 34   40 10.00         0.9150943</span></span>
1150
+<span><span class="co">## 1    10  0.01         0.7594816</span></span>
1151
+<span><span class="co">## 2    20  0.01         0.8174195</span></span>
1152
+<span><span class="co">## 3    30  0.01         0.8268534</span></span>
1153
+<span><span class="co">## 4    40  0.01         0.8362874</span></span>
1154
+<span><span class="co">## 5    50  0.01         0.8457214</span></span>
1155
+<span><span class="co">## 6    60  0.01         0.8457214</span></span>
1156
+<span><span class="co">## 7    70  0.01         0.8595388</span></span>
1157
+<span><span class="co">## 8    80  0.01         0.8784067</span></span>
1158
+<span><span class="co">## 9    90  0.01         0.8784067</span></span>
1159
+<span><span class="co">## 10  100  0.01         0.8689727</span></span>
1160
+<span><span class="co">## 11   10  0.10         0.7638651</span></span>
1161
+<span><span class="co">## 12   20  0.10         0.8602058</span></span>
1162
+<span><span class="co">## 13   30  0.10         0.8797408</span></span>
1163
+<span><span class="co">## 14   40  0.10         0.8948923</span></span>
1164
+<span><span class="co">## 15   50  0.10         0.8992758</span></span>
1165
+<span><span class="co">## 16   60  0.10         0.9043263</span></span>
1166
+<span><span class="co">## 17   70  0.10         0.9043263</span></span>
1167
+<span><span class="co">## 18   80  0.10         0.9326282</span></span>
1168
+<span><span class="co">## 19   90  0.10         0.9376787</span></span>
1169
+<span><span class="co">## 20  100  0.10         0.9615971</span></span>
1170
+<span><span class="co">## 21   10  1.00         0.7796836</span></span>
1171
+<span><span class="co">## 22   20  1.00         0.8898418</span></span>
1172
+<span><span class="co">## 23   30  1.00         0.8797408</span></span>
1173
+<span><span class="co">## 24   40  1.00         0.9093768</span></span>
1174
+<span><span class="co">## 25   50  1.00         0.9332952</span></span>
1175
+<span><span class="co">## 26   60  1.00         0.9427292</span></span>
1176
+<span><span class="co">## 27   70  1.00         0.9572136</span></span>
1177
+<span><span class="co">## 28   80  1.00         0.9572136</span></span>
1178
+<span><span class="co">## 29   90  1.00         0.9760816</span></span>
1179
+<span><span class="co">## 30  100  1.00         1.0000000</span></span>
1180
+<span><span class="co">## 31   10 10.00         0.8268534</span></span>
1181
+<span><span class="co">## 32   20 10.00         0.8615399</span></span>
1182
+<span><span class="co">## 33   30 10.00         0.9043263</span></span>
1183
+<span><span class="co">## 34   40 10.00         0.9477797</span></span>
919 1184
 <span><span class="co">## 35   50 10.00         0.9804650</span></span>
920
-<span><span class="co">## 36   60 10.00         0.9905660</span></span>
1185
+<span><span class="co">## 36   60 10.00         0.9949495</span></span>
921 1186
 <span><span class="co">## 37   70 10.00         1.0000000</span></span>
922 1187
 <span><span class="co">## 38   80 10.00         1.0000000</span></span>
923 1188
 <span><span class="co">## 39   90 10.00         1.0000000</span></span>
924 1189
 <span><span class="co">## 40  100 10.00         1.0000000</span></span>
925 1190
 <span><span class="co">## </span></span>
926 1191
 <span><span class="co">## [[4]]$bestIndex</span></span>
927
-<span><span class="co">## [1] 37</span></span>
1192
+<span><span class="co">## [1] 30</span></span>
928 1193
 <span><span class="co">## </span></span>
929 1194
 <span><span class="co">## </span></span>
930 1195
 <span><span class="co">## [[5]]</span></span>
931 1196
 <span><span class="co">## [[5]]$tuneCombinations</span></span>
932 1197
 <span><span class="co">##    topN  cost Balanced Accuracy</span></span>
933
-<span><span class="co">## 1    10  0.01         0.8053846</span></span>
934
-<span><span class="co">## 2    20  0.01         0.7857692</span></span>
935
-<span><span class="co">## 3    30  0.01         0.7907692</span></span>