Browse code

disable use of __pthread_get_minstack which is non public symbol (this may be a temporary fix)

Ben Bolstad authored on 24/11/2020 16:02:23
Showing1 changed files
... ...
@@ -54,8 +54,10 @@ struct loop_data{
54 54
 #ifdef __linux__
55 55
 #include <features.h>
56 56
 #ifdef __GLIBC__
57
-#ifdef __GLIBC_PREREQ && __GLIBC_PREREQ(2, 15)
58
-#define INFER_MIN_STACKSIZE 1
57
+#ifdef __GLIBC_PREREQ
58
+#if __GLIBC_PREREQ(2, 15)
59
+/*#define INFER_MIN_STACKSIZE 0  */     /* currently disabled */
60
+#endif
59 61
 #endif
60 62
 #endif
61 63
 #endif
Browse code

Merge branch 'master' of https://github.com/franciscodavid/preprocessCore into franciscodavid-master

Ben Bolstad authored on 01/02/2020 18:00:13
Showing0 changed files
Browse code

do not define mutex_R multiple times

Tom Callaway authored on 01/02/2020 15:24:36
Showing1 changed files
... ...
@@ -26,7 +26,7 @@
26 26
 #include "rlm_se.h"
27 27
 #include "psi_fns.h"
28 28
 #include "medianpolish.h"
29
-
29
+#include "common.h"
30 30
 
31 31
 
32 32
 
... ...
@@ -37,7 +37,6 @@
37 37
 #include <limits.h>
38 38
 #include <unistd.h>
39 39
 #define THREADS_ENV_VAR "R_THREADS"
40
-pthread_mutex_t mutex_R;
41 40
 struct loop_data{
42 41
   double *matrix;
43 42
   SEXP *R_return_value;
Browse code

Getting stacksize values that do not conflict with openblas threads

With the introduction of the USE_TLS option in openblas 0.3.4, threads created by packages loading openblas libraries are affected by a glibc bug described here:

https://sourceware.org/bugzilla/show_bug.cgi?id=11787

The ongoing discussion in openblas:

https://github.com/xianyi/OpenBLAS/issues/1936

Example workarounds used by other projects that inspired this one:

https://github.com/rust-lang/rust/issues/6233
https://github.com/rust-lang/rust/pull/11885
https://bugs.openjdk.java.net/browse/JDK-8225498

Francisco D. MorĂ³n-Duran authored on 29/07/2019 14:02:51
Showing1 changed files
... ...
@@ -51,6 +51,16 @@ struct loop_data{
51 51
   int start_row;
52 52
   int end_row;
53 53
 };
54
+
55
+#ifdef __linux__
56
+#include <features.h>
57
+#ifdef __GLIBC__
58
+#ifdef __GLIBC_PREREQ && __GLIBC_PREREQ(2, 15)
59
+#define INFER_MIN_STACKSIZE 1
60
+#endif
61
+#endif
62
+#endif
63
+
54 64
 #endif
55 65
 
56 66
 
... ...
@@ -165,11 +175,17 @@ SEXP R_sub_rcModelSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){
165 175
   double chunk_size_d, chunk_tot_d;
166 176
   char *nthreads;
167 177
   pthread_attr_t attr;
178
+  /* Initialize thread attribute */
179
+  pthread_attr_init(&attr);
168 180
   pthread_t *threads;
169 181
   struct loop_data *args;
170 182
   void *status; 
171 183
 #ifdef PTHREAD_STACK_MIN
172
-  size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
184
+#ifdef INFER_MIN_STACKSIZE
185
+  size_t stacksize = __pthread_get_minstack(&attr) + sysconf(_SC_PAGE_SIZE);
186
+#else
187
+  size_t stacksize = PTHREAD_STACK_MIN + sysconf(_SC_PAGE_SIZE);
188
+#endif
173 189
 #else
174 190
   size_t stacksize = 0x8000;
175 191
 #endif
... ...
@@ -211,8 +227,7 @@ SEXP R_sub_rcModelSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){
211 227
   }
212 228
   threads = (pthread_t *) Calloc(num_threads, pthread_t);
213 229
 
214
-  /* Initialize and set thread detached attribute */
215
-  pthread_attr_init(&attr);
230
+  /* Set thread detached attribute */
216 231
   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
217 232
   pthread_attr_setstacksize (&attr, stacksize);
218 233
   
... ...
@@ -480,11 +495,17 @@ SEXP R_sub_rcModelSummarize_plm(SEXP RMatrix, SEXP R_rowIndexList, SEXP PsiCode,
480 495
   double chunk_size_d, chunk_tot_d;
481 496
   char *nthreads;
482 497
   pthread_attr_t attr;
498
+  /* Initialize thread attribute */
499
+  pthread_attr_init(&attr);
483 500
   pthread_t *threads;
484 501
   struct loop_data *args;
485 502
   void *status; 
486 503
 #ifdef PTHREAD_STACK_MIN
487
-  size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
504
+#ifdef INFER_MIN_STACKSIZE
505
+  size_t stacksize = __pthread_get_minstack(&attr) + sysconf(_SC_PAGE_SIZE);
506
+#else
507
+  size_t stacksize = PTHREAD_STACK_MIN + sysconf(_SC_PAGE_SIZE);
508
+#endif
488 509
 #else
489 510
   size_t stacksize = 0x8000;
490 511
 #endif
... ...
@@ -532,8 +553,7 @@ SEXP R_sub_rcModelSummarize_plm(SEXP RMatrix, SEXP R_rowIndexList, SEXP PsiCode,
532 553
   }
533 554
   threads = (pthread_t *) Calloc(num_threads, pthread_t);
534 555
 
535
-  /* Initialize and set thread detached attribute */
536
-  pthread_attr_init(&attr);
556
+  /* Set thread detached attribute */
537 557
   pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
538 558
   pthread_attr_setstacksize (&attr, stacksize);
539 559
   
Browse code

fix non-void return warnings

Ben Bolstad authored on 04/09/2014 03:01:44
Showing1 changed files
... ...
@@ -137,7 +137,8 @@ static void *sub_rcModelSummarize_medianpolish_group(void *data){
137 137
     for (i=0; i < cols; i++)
138 138
         beta[i]+=intercept;
139 139
 
140
-  }
140
+  }  
141
+  return NULL;
141 142
 }
142 143
 #endif
143 144
 
... ...
@@ -451,6 +452,7 @@ static void *sub_rcModelSummarize_plm_group(void *data){
451 452
     Free(Ymat);
452 453
      
453 454
   }
455
+  return NULL;
454 456
 }
455 457
 #endif
456 458
 
Browse code

setting up git-svn bridge

Bioconductor Git-SVN Bridge authored on 31/08/2014 21:28:19
Showing1 changed files
1 1
new file mode 100644
... ...
@@ -0,0 +1,690 @@
1
+/*********************************************************************
2
+ **
3
+ ** file: R_subrcModel_interface.c
4
+ **
5
+ ** Aim: Code which provides .Call() interfaces to the subset of rows in 
6
+ ** a matrix rcModel fitting
7
+ **
8
+ ** Copyright (C) 2012 Ben Bolstad
9
+ **
10
+ ** created by: B. M. Bolstad <bmb@bmbolstad.com>
11
+ ** 
12
+ ** created on: Mar 7, 2012
13
+ **
14
+ ** History
15
+ ** Mar 7, 2012 - Initial version
16
+ **
17
+ *********************************************************************/
18
+
19
+
20
+#include <R.h>
21
+#include <Rdefines.h>
22
+#include <Rmath.h>
23
+#include <Rinternals.h>
24
+
25
+#include "rlm.h"
26
+#include "rlm_se.h"
27
+#include "psi_fns.h"
28
+#include "medianpolish.h"
29
+
30
+
31
+
32
+
33
+
34
+
35
+#ifdef USE_PTHREADS
36
+#include <pthread.h>
37
+#include <limits.h>
38
+#include <unistd.h>
39
+#define THREADS_ENV_VAR "R_THREADS"
40
+pthread_mutex_t mutex_R;
41
+struct loop_data{
42
+  double *matrix;
43
+  SEXP *R_return_value;
44
+  SEXP *R_rowIndexList;
45
+  SEXP *PsiCode;
46
+  SEXP *PsiK;
47
+  SEXP *Scales; 
48
+  int rows;
49
+  int cols;
50
+  int length_rowIndexList;
51
+  int start_row;
52
+  int end_row;
53
+};
54
+#endif
55
+
56
+
57
+
58
+
59
+#ifdef  USE_PTHREADS
60
+static void *sub_rcModelSummarize_medianpolish_group(void *data){
61
+  
62
+  struct loop_data *args = (struct loop_data *) data;
63
+  int *cur_rows;
64
+  double *buffer, *buffer2;
65
+  int i, j, k;
66
+  int ncur_rows;
67
+
68
+  
69
+  SEXP R_return_value_cur;
70
+  SEXP R_weights;
71
+  SEXP R_residuals;
72
+  SEXP R_beta;
73
+  SEXP R_SE;
74
+  
75
+  SEXP R_return_value_names;
76
+
77
+  double *beta;
78
+  double *residuals;
79
+  double *weights;
80
+  double *se;
81
+
82
+  double intercept;
83
+
84
+  int cols = args->cols;
85
+ 
86
+  for (j = args->start_row; j <= args->end_row;  j++){    
87
+    ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); 
88
+    cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j));
89
+    
90
+    pthread_mutex_lock(&mutex_R);
91
+    PROTECT(R_return_value_cur = allocVector(VECSXP,4));
92
+    PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols));
93
+    /* PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));*/
94
+    PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols));
95
+    /*  PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); */
96
+
97
+    R_weights = R_NilValue;
98
+    R_SE = R_NilValue;
99
+
100
+    beta = NUMERIC_POINTER(R_beta);
101
+    residuals = NUMERIC_POINTER(R_residuals);
102
+    /*  weights = NUMERIC_POINTER(R_weights);
103
+        se = NUMERIC_POINTER(R_SE);
104
+    */
105
+
106
+    SET_VECTOR_ELT(R_return_value_cur,0,R_beta);
107
+    SET_VECTOR_ELT(R_return_value_cur,1,R_weights);
108
+    SET_VECTOR_ELT(R_return_value_cur,2,R_residuals);
109
+    SET_VECTOR_ELT(R_return_value_cur,3,R_SE);
110
+    UNPROTECT(2);
111
+
112
+    PROTECT(R_return_value_names= allocVector(STRSXP,4));
113
+    SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates"));
114
+    SET_STRING_ELT(R_return_value_names,1,mkChar("Weights"));
115
+    SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals"));
116
+    SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors"));
117
+    setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names);
118
+    UNPROTECT(1);
119
+
120
+    SET_VECTOR_ELT(*(args->R_return_value),j,R_return_value_cur); 
121
+    UNPROTECT(1);
122
+    pthread_mutex_unlock(&mutex_R);
123
+
124
+
125
+
126
+
127
+    for (k = 0; k < cols; k++){
128
+        for (i =0; i < ncur_rows; i++){
129
+     	    residuals[k*ncur_rows + i] = args->matrix[k*args->rows + cur_rows[i]];  
130
+        }
131
+    } 
132
+
133
+    memset(beta, 0, (ncur_rows+cols)*sizeof(double));
134
+
135
+    median_polish_fit_no_copy(residuals, ncur_rows, cols, &beta[cols], &beta[0], &intercept);
136
+
137
+    for (i=0; i < cols; i++)
138
+        beta[i]+=intercept;
139
+
140
+  }
141
+}
142
+#endif
143
+
144
+
145
+
146
+
147
+SEXP R_sub_rcModelSummarize_medianpolish(SEXP RMatrix, SEXP R_rowIndexList){
148
+
149
+  SEXP R_return_value;  
150
+  SEXP dim1;
151
+
152
+  double *matrix=NUMERIC_POINTER(RMatrix);
153
+  double *results, *buffer, *buffer2;
154
+  
155
+  int *cur_rows;
156
+
157
+  int rows, cols;
158
+  int length_rowIndexList = LENGTH(R_rowIndexList);
159
+  int ncur_rows;
160
+
161
+  int i,j;
162
+#ifdef USE_PTHREADS
163
+  int t, returnCode, chunk_size, num_threads = 1;
164
+  double chunk_size_d, chunk_tot_d;
165
+  char *nthreads;
166
+  pthread_attr_t attr;
167
+  pthread_t *threads;
168
+  struct loop_data *args;
169
+  void *status; 
170
+#ifdef PTHREAD_STACK_MIN
171
+  size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
172
+#else
173
+  size_t stacksize = 0x8000;
174
+#endif
175
+#else
176
+
177
+  SEXP R_return_value_cur;
178
+
179
+  SEXP R_weights;
180
+  SEXP R_residuals;
181
+  SEXP R_beta;
182
+  SEXP R_SE;
183
+  
184
+  SEXP R_return_value_names;
185
+
186
+  double *beta;
187
+  double *residuals;
188
+  double *weights;
189
+  double *se;
190
+
191
+  double intercept;
192
+
193
+  int k;
194
+#endif
195
+
196
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
197
+  rows = INTEGER(dim1)[0];
198
+  cols = INTEGER(dim1)[1];
199
+  UNPROTECT(1);
200
+
201
+  PROTECT(R_return_value = allocVector(VECSXP,length_rowIndexList));
202
+  
203
+#ifdef  USE_PTHREADS
204
+  nthreads = getenv(THREADS_ENV_VAR);
205
+  if(nthreads != NULL){
206
+    num_threads = atoi(nthreads);
207
+    if(num_threads <= 0){
208
+      error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads);
209
+    }
210
+  }
211
+  threads = (pthread_t *) Calloc(num_threads, pthread_t);
212
+
213
+  /* Initialize and set thread detached attribute */
214
+  pthread_attr_init(&attr);
215
+  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
216
+  pthread_attr_setstacksize (&attr, stacksize);
217
+  
218
+  /* this code works out how many threads to use and allocates ranges of subColumns to each thread */
219
+  /* The aim is to try to be as fair as possible in dividing up the matrix */
220
+  /* A special cases to be aware of: 
221
+     1) Number of subColumns is less than the number of threads
222
+  */
223
+  
224
+  if (num_threads < length_rowIndexList){
225
+    chunk_size = length_rowIndexList/num_threads;
226
+    chunk_size_d = ((double) length_rowIndexList)/((double) num_threads);
227
+  } else {
228
+    chunk_size = 1;
229
+    chunk_size_d = 1;
230
+  }
231
+
232
+  if(chunk_size == 0){
233
+    chunk_size = 1;
234
+  }
235
+  args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data);
236
+
237
+  args[0].matrix = matrix;
238
+  args[0].R_return_value = &R_return_value;
239
+  args[0].R_rowIndexList = &R_rowIndexList;
240
+  args[0].rows = rows;  
241
+  args[0].cols = cols;
242
+  args[0].length_rowIndexList = length_rowIndexList;
243
+
244
+  pthread_mutex_init(&mutex_R, NULL);
245
+
246
+  t = 0; /* t = number of actual threads doing work */
247
+  chunk_tot_d = 0;
248
+  for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){
249
+     if(t != 0){
250
+       memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data));
251
+     }
252
+
253
+     args[t].start_row = i;     
254
+     /* take care of distribution of the remainder (when #chips%#threads != 0) */
255
+     chunk_tot_d += chunk_size_d;
256
+     // Add 0.00001 in case there was a rounding issue with the division
257
+     if(i+chunk_size < floor(chunk_tot_d+0.00001)){
258
+       args[t].end_row = i+chunk_size;
259
+       i++;
260
+     }
261
+     else{
262
+       args[t].end_row = i+chunk_size-1;
263
+     }
264
+     t++;
265
+  }
266
+
267
+  
268
+  for (i =0; i < t; i++){
269
+     returnCode = pthread_create(&threads[i], &attr, sub_rcModelSummarize_medianpolish_group, (void *) &(args[i]));
270
+     if (returnCode){
271
+         error("ERROR; return code from pthread_create() is %d\n", returnCode);
272
+     }
273
+  }
274
+  /* Wait for the other threads */
275
+  for(i = 0; i < t; i++){
276
+      returnCode = pthread_join(threads[i], &status);
277
+      if (returnCode){
278
+         error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", 
279
+               i, returnCode, *((int *) status));
280
+      }
281
+  }
282
+
283
+  pthread_attr_destroy(&attr);  
284
+  pthread_mutex_destroy(&mutex_R);
285
+  Free(threads);
286
+  Free(args);  
287
+#else     
288
+
289
+  for (j =0; j < length_rowIndexList; j++){    
290
+
291
+    ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); 
292
+    cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j));
293
+
294
+    PROTECT(R_return_value_cur = allocVector(VECSXP,4));
295
+    PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols));
296
+    /* PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));*/
297
+    PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols));
298
+    /*  PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); */
299
+
300
+    R_weights = R_NilValue;
301
+    R_SE = R_NilValue;
302
+
303
+
304
+    SET_VECTOR_ELT(R_return_value_cur,0,R_beta);
305
+    SET_VECTOR_ELT(R_return_value_cur,1,R_weights);
306
+    SET_VECTOR_ELT(R_return_value_cur,2,R_residuals);
307
+    SET_VECTOR_ELT(R_return_value_cur,3,R_SE);
308
+
309
+    UNPROTECT(2);
310
+
311
+    beta = NUMERIC_POINTER(R_beta);
312
+    residuals = NUMERIC_POINTER(R_residuals);
313
+    /*  weights = NUMERIC_POINTER(R_weights);
314
+        se = NUMERIC_POINTER(R_SE);
315
+    */
316
+
317
+    for (k = 0; k < cols; k++){
318
+        for (i =0; i < ncur_rows; i++){
319
+     	    residuals[k*ncur_rows + i] = matrix[k*rows + cur_rows[i]];  
320
+        }
321
+    } 
322
+
323
+    memset(beta, 0, (ncur_rows+cols)*sizeof(double));
324
+
325
+    median_polish_fit_no_copy(residuals, ncur_rows, cols, &beta[cols], &beta[0], &intercept);
326
+
327
+    for (i=0; i < cols; i++)
328
+        beta[i]+=intercept;
329
+
330
+    PROTECT(R_return_value_names= allocVector(STRSXP,4));
331
+    SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates"));
332
+    SET_STRING_ELT(R_return_value_names,1,mkChar("Weights"));
333
+    SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals"));
334
+    SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors"));
335
+    setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names);
336
+    UNPROTECT(2);
337
+    SET_VECTOR_ELT(R_return_value,j,R_return_value_cur);
338
+  }
339
+#endif
340
+  UNPROTECT(1);
341
+  return R_return_value;
342
+}
343
+
344
+
345
+
346
+
347
+
348
+
349
+
350
+
351
+
352
+
353
+
354
+
355
+#ifdef  USE_PTHREADS
356
+static void *sub_rcModelSummarize_plm_group(void *data){
357
+  
358
+  struct loop_data *args = (struct loop_data *) data;
359
+  int *cur_rows;
360
+  double *buffer, *buffer2;
361
+  int i, j, k;
362
+  int ncur_rows;
363
+
364
+  
365
+  SEXP R_return_value_cur;
366
+  SEXP R_weights;
367
+  SEXP R_residuals;
368
+  SEXP R_beta;
369
+  SEXP R_SE;
370
+  SEXP R_scale;
371
+
372
+  SEXP R_return_value_names;
373
+
374
+  double *Ymat;
375
+
376
+  double *beta;
377
+  double *residuals;
378
+  double *weights;
379
+  double *se;
380
+  
381
+  double scale=-1.0;
382
+  double *scaleptr;
383
+
384
+  double residSE;
385
+
386
+  int cols = args->cols;
387
+ 
388
+  for (j = args->start_row; j <= args->end_row;  j++){    
389
+    ncur_rows = LENGTH(VECTOR_ELT(*(args->R_rowIndexList),j)); 
390
+    cur_rows = INTEGER_POINTER(VECTOR_ELT(*(args->R_rowIndexList),j));
391
+  
392
+    pthread_mutex_lock(&mutex_R);
393
+    PROTECT(R_return_value_cur = allocVector(VECSXP,5));
394
+    PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols));
395
+    PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));
396
+    PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols));
397
+    PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); 
398
+    PROTECT(R_scale = allocVector(REALSXP,1));
399
+  
400
+    beta = NUMERIC_POINTER(R_beta);
401
+    residuals = NUMERIC_POINTER(R_residuals);
402
+    weights = NUMERIC_POINTER(R_weights);
403
+    se = NUMERIC_POINTER(R_SE);
404
+    scaleptr = NUMERIC_POINTER(R_scale);
405
+
406
+    SET_VECTOR_ELT(R_return_value_cur,0,R_beta);
407
+    SET_VECTOR_ELT(R_return_value_cur,1,R_weights);
408
+    SET_VECTOR_ELT(R_return_value_cur,2,R_residuals);
409
+    SET_VECTOR_ELT(R_return_value_cur,3,R_SE);
410
+    SET_VECTOR_ELT(R_return_value_cur,4,R_scale);
411
+    UNPROTECT(5); 
412
+   
413
+    PROTECT(R_return_value_names= allocVector(STRSXP,5));
414
+    SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates"));
415
+    SET_STRING_ELT(R_return_value_names,1,mkChar("Weights"));
416
+    SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals"));
417
+    SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors"));
418
+    SET_STRING_ELT(R_return_value_names,4,mkChar("Scale"));
419
+    setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names);
420
+    UNPROTECT(1);
421
+ 
422
+    SET_VECTOR_ELT(*(args->R_return_value),j,R_return_value_cur);
423
+    UNPROTECT(1);
424
+    pthread_mutex_unlock(&mutex_R);	
425
+  
426
+    if (isNull(*args->Scales)){
427
+      scaleptr[0] = -1.0;
428
+    } else if (length(*args->Scales) != cols) {
429
+      scaleptr[0] = NUMERIC_POINTER(*args->Scales)[0];
430
+    }
431
+
432
+
433
+    Ymat = Calloc(ncur_rows*cols,double);
434
+    
435
+    
436
+    for (k = 0; k < cols; k++){
437
+        for (i =0; i < ncur_rows; i++){
438
+     	    Ymat[k*ncur_rows + i] = args->matrix[k*args->rows + cur_rows[i]];  
439
+        }
440
+    } 
441
+
442
+    rlm_fit_anova_scale(Ymat, ncur_rows, cols, scaleptr, beta, residuals, weights, PsiFunc(asInteger(*args->PsiCode)),asReal(*args->PsiK), 20, 0);
443
+  
444
+    rlm_compute_se_anova(Ymat, ncur_rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(*args->PsiCode)),asReal(*args->PsiK));
445
+
446
+    beta[ncur_rows+cols -1] = 0.0;
447
+
448
+    for (i = cols; i < ncur_rows + cols -1; i++)
449
+       beta[ncur_rows+cols -1]-=beta[i];
450
+
451
+    Free(Ymat);
452
+     
453
+  }
454
+}
455
+#endif
456
+
457
+
458
+
459
+
460
+
461
+SEXP R_sub_rcModelSummarize_plm(SEXP RMatrix, SEXP R_rowIndexList, SEXP PsiCode, SEXP PsiK, SEXP Scales){
462
+
463
+  SEXP R_return_value;  
464
+  SEXP dim1;
465
+
466
+  double *matrix=NUMERIC_POINTER(RMatrix);
467
+  double *results, *buffer, *buffer2;
468
+  
469
+  int *cur_rows;
470
+
471
+  int rows, cols;
472
+  int length_rowIndexList = LENGTH(R_rowIndexList);
473
+  int ncur_rows;
474
+
475
+  int i,j;
476
+#ifdef USE_PTHREADS
477
+  int t, returnCode, chunk_size, num_threads = 1;
478
+  double chunk_size_d, chunk_tot_d;
479
+  char *nthreads;
480
+  pthread_attr_t attr;
481
+  pthread_t *threads;
482
+  struct loop_data *args;
483
+  void *status; 
484
+#ifdef PTHREAD_STACK_MIN
485
+  size_t stacksize = PTHREAD_STACK_MIN + 0x4000;
486
+#else
487
+  size_t stacksize = 0x8000;
488
+#endif
489
+#else
490
+
491
+  SEXP R_return_value_cur;
492
+
493
+  SEXP R_weights;
494
+  SEXP R_residuals;
495
+  SEXP R_beta;
496
+  SEXP R_SE;
497
+  SEXP R_scale;
498
+
499
+  SEXP R_return_value_names;
500
+
501
+  double *Ymat;
502
+
503
+  double *beta;
504
+  double *residuals;
505
+  double *weights;
506
+  double *se;
507
+
508
+  double scale=-1.0;
509
+  double *scaleptr;
510
+
511
+  double residSE;
512
+
513
+  int k;
514
+#endif
515
+
516
+  PROTECT(dim1 = getAttrib(RMatrix,R_DimSymbol));
517
+  rows = INTEGER(dim1)[0];
518
+  cols = INTEGER(dim1)[1];
519
+  UNPROTECT(1);
520
+
521
+  PROTECT(R_return_value = allocVector(VECSXP,length_rowIndexList));
522
+  
523
+#ifdef  USE_PTHREADS
524
+  nthreads = getenv(THREADS_ENV_VAR);
525
+  if(nthreads != NULL){
526
+    num_threads = atoi(nthreads);
527
+    if(num_threads <= 0){
528
+      error("The number of threads (enviroment variable %s) must be a positive integer, but the specified value was %s", THREADS_ENV_VAR, nthreads);
529
+    }
530
+  }
531
+  threads = (pthread_t *) Calloc(num_threads, pthread_t);
532
+
533
+  /* Initialize and set thread detached attribute */
534
+  pthread_attr_init(&attr);
535
+  pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
536
+  pthread_attr_setstacksize (&attr, stacksize);
537
+  
538
+  /* this code works out how many threads to use and allocates ranges of subColumns to each thread */
539
+  /* The aim is to try to be as fair as possible in dividing up the matrix */
540
+  /* A special cases to be aware of: 
541
+     1) Number of subColumns is less than the number of threads
542
+  */
543
+  
544
+  if (num_threads < length_rowIndexList){
545
+    chunk_size = length_rowIndexList/num_threads;
546
+    chunk_size_d = ((double) length_rowIndexList)/((double) num_threads);
547
+  } else {
548
+    chunk_size = 1;
549
+    chunk_size_d = 1;
550
+  }
551
+
552
+  if(chunk_size == 0){
553
+    chunk_size = 1;
554
+  }
555
+  args = (struct loop_data *) Calloc((length_rowIndexList < num_threads ? length_rowIndexList : num_threads), struct loop_data);
556
+
557
+  args[0].matrix = matrix;
558
+  args[0].R_return_value = &R_return_value;
559
+  args[0].R_rowIndexList = &R_rowIndexList;
560
+  args[0].PsiCode = &PsiCode;
561
+  args[0].PsiK = &PsiK;
562
+  args[0].Scales = &Scales;
563
+  args[0].rows = rows;  
564
+  args[0].cols = cols;
565
+  args[0].length_rowIndexList = length_rowIndexList;
566
+
567
+  pthread_mutex_init(&mutex_R, NULL);
568
+
569
+  t = 0; /* t = number of actual threads doing work */
570
+  chunk_tot_d = 0;
571
+  for (i=0; floor(chunk_tot_d+0.00001) < length_rowIndexList; i+=chunk_size){
572
+     if(t != 0){
573
+       memcpy(&(args[t]), &(args[0]), sizeof(struct loop_data));
574
+     }
575
+
576
+     args[t].start_row = i;     
577
+     /* take care of distribution of the remainder (when #chips%#threads != 0) */
578
+     chunk_tot_d += chunk_size_d;
579
+     // Add 0.00001 in case there was a rounding issue with the division
580
+     if(i+chunk_size < floor(chunk_tot_d+0.00001)){
581
+       args[t].end_row = i+chunk_size;
582
+       i++;
583
+     }
584
+     else{
585
+       args[t].end_row = i+chunk_size-1;
586
+     }
587
+     t++;
588
+  }
589
+
590
+  
591
+  for (i =0; i < t; i++){
592
+     returnCode = pthread_create(&threads[i], &attr, sub_rcModelSummarize_plm_group, (void *) &(args[i]));
593
+     if (returnCode){
594
+         error("ERROR; return code from pthread_create() is %d\n", returnCode);
595
+     }
596
+  }
597
+  /* Wait for the other threads */
598
+  for(i = 0; i < t; i++){
599
+      returnCode = pthread_join(threads[i], &status);
600
+      if (returnCode){
601
+         error("ERROR; return code from pthread_join(thread #%d) is %d, exit status for thread was %d\n", 
602
+               i, returnCode, *((int *) status));
603
+      }
604
+  }
605
+
606
+  pthread_attr_destroy(&attr);  
607
+  pthread_mutex_destroy(&mutex_R);
608
+  Free(threads);
609
+  Free(args);  
610
+#else     
611
+
612
+  for (j =0; j < length_rowIndexList; j++){    
613
+
614
+    ncur_rows = LENGTH(VECTOR_ELT(R_rowIndexList,j)); 
615
+    cur_rows = INTEGER_POINTER(VECTOR_ELT(R_rowIndexList,j));
616
+
617
+    PROTECT(R_return_value_cur = allocVector(VECSXP,5));
618
+    PROTECT(R_beta = allocVector(REALSXP, ncur_rows + cols));
619
+    PROTECT(R_weights = allocMatrix(REALSXP,ncur_rows,cols));
620
+    PROTECT(R_residuals = allocMatrix(REALSXP,ncur_rows,cols));
621
+    PROTECT(R_SE = allocVector(REALSXP,ncur_rows+cols)); 
622
+    PROTECT(R_scale = allocVector(REALSXP,1));
623
+
624
+    SET_VECTOR_ELT(R_return_value_cur,0,R_beta);
625
+    SET_VECTOR_ELT(R_return_value_cur,1,R_weights);
626
+    SET_VECTOR_ELT(R_return_value_cur,2,R_residuals);
627
+    SET_VECTOR_ELT(R_return_value_cur,3,R_SE);
628
+    SET_VECTOR_ELT(R_return_value_cur,4,R_scale);
629
+
630
+    UNPROTECT(5);
631
+
632
+    beta = NUMERIC_POINTER(R_beta);
633
+    residuals = NUMERIC_POINTER(R_residuals);
634
+    weights = NUMERIC_POINTER(R_weights);
635
+    se = NUMERIC_POINTER(R_SE);
636
+    
637
+
638
+   scaleptr = NUMERIC_POINTER(R_scale);
639
+
640
+
641
+    if (isNull(Scales)){
642
+      scaleptr[0] = -1.0;
643
+    } else if (length(Scales) != cols) {
644
+      scaleptr[0] = NUMERIC_POINTER(Scales)[0];
645
+    }
646
+
647
+
648
+    Ymat = Calloc(ncur_rows*cols,double);
649
+    
650
+    
651
+    for (k = 0; k < cols; k++){
652
+        for (i =0; i < ncur_rows; i++){
653
+     	    Ymat[k*ncur_rows + i] = matrix[k*rows + cur_rows[i]];  
654
+        }
655
+    } 
656
+
657
+    rlm_fit_anova_scale(Ymat, ncur_rows, cols, scaleptr, beta, residuals, weights, PsiFunc(asInteger(PsiCode)),asReal(PsiK), 20, 0);
658
+  
659
+    rlm_compute_se_anova(Ymat, ncur_rows, cols, beta, residuals, weights,se, (double *)NULL, &residSE, 4, PsiFunc(asInteger(PsiCode)),asReal(PsiK));
660
+
661
+
662
+  
663
+
664
+     beta[ncur_rows+cols -1] = 0.0;
665
+  
666
+
667
+     for (i = cols; i < ncur_rows + cols -1; i++)
668
+        beta[ncur_rows+cols -1]-=beta[i];
669
+
670
+     Free(Ymat);
671
+     PROTECT(R_return_value_names= allocVector(STRSXP,5));
672
+     SET_STRING_ELT(R_return_value_names,0,mkChar("Estimates"));
673
+     SET_STRING_ELT(R_return_value_names,1,mkChar("Weights"));
674
+     SET_STRING_ELT(R_return_value_names,2,mkChar("Residuals"));
675
+     SET_STRING_ELT(R_return_value_names,3,mkChar("StdErrors"));
676
+     SET_STRING_ELT(R_return_value_names,4,mkChar("Scale"));
677
+     setAttrib(R_return_value_cur, R_NamesSymbol,R_return_value_names);
678
+     UNPROTECT(2);
679
+     SET_VECTOR_ELT(R_return_value,j,R_return_value_cur);
680
+  }
681
+#endif
682
+  UNPROTECT(1);
683
+  return R_return_value;
684
+}
685
+
686
+
687
+
688
+
689
+
690
+