Mercurial > repos > eschen42 > w4mkmeans

--- a/w4mkmeans.xml	Wed Aug 09 17:55:08 2017 -0400
+++ b/w4mkmeans.xml	Tue Feb 27 22:51:04 2018 -0500
@@ -1,9 +1,11 @@
-<tool id="w4mkmeans" name="w4mKmeans" version="0.98.3">
+<tool id="w4mkmeans" name="w4mKmeans" version="0.98.4">
   <description>Calculate K-means for W4M dataMatrix features or samples</description>

   <requirements>
-    <requirement type="package" version="3.3.2">r-base</requirement>
+    <requirement type="package" version="3.4.1">r-base</requirement>
     <requirement type="package" version="1.1_4">r-batch</requirement>
+    <requirement type="package" version="1.8.0">libssh2</requirement>
+    <requirement type="package" version="1.13.2">krb5</requirement>
   </requirements>

   <stdio>
@@ -27,18 +29,17 @@
       slots "\${GALAXY_SLOTS:-1}"
       variableMetadata_out '$variableMetadata_out'
       variable_metadata_path '$variableMetadata_in'
-    ; echo exit code $?
   ]]></command>

   <inputs>
     <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
     <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
     <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />
-    <param name="categoricalPrefix" label="prefix for cluster names " type="text" value="k" help="[categorical_prefix] Some tools require non-numeric values to discern categorical data; e.g., enter 'k' here to prepend 'k' to cluster numbers in the output; default 'k'." />
+    <param name="categoricalPrefix" label="prefix for cluster names " type="text" value="c" help="[categorical_prefix] Some tools require non-numeric values to discern categorical data; e.g., enter 'k' here to prepend 'k' to cluster numbers in the output; default 'c'." />
     <param name="ksamples" label="K value(s) for samples" type="text" value = "0" help="[ksamples] Single K or comma-separated Ks for samples, or 0 for none." />
     <param name="kfeatures" label="K value(s) for features" type="text" value = "0" help="[kfeatures] Single K or comma-separated Ks for features (variables), or 0 for none." />
-    <param name="iter_max" label="Max number of iterations" type="text" value = "10" help="[iter_max] The maximum number of iterations allowed; default 10." />
-    <param name="nstart" label="Number of random sets" type="text" value = "1" help="[nstart] How many random sets should be chosen; default 1." />
+    <param name="iter_max" label="Max number of iterations" type="text" value = "20" help="[iter_max] The maximum number of iterations allowed; default 20." />
+    <param name="nstart" label="Number of random sets" type="text" value = "20" help="[nstart] How many random sets should be chosen; default 20." />
     <param name="algorithm" label="Algorithm for clustering" type="select" value = "Hartigan-Wong" help="[algorithm] K-means clustering algorithm, default 'Hartigan-Wong'; alternatives 'Lloyd', 'MacQueen'; 'Forgy' is a synonym for 'Lloyd', see stats::kmeans reference for further info.">
       <option value="Forgy">Forgy</option>
       <option value="Hartigan-Wong" selected="True">Hartigan-Wong</option>
@@ -48,9 +49,9 @@
   </inputs>

   <outputs>
-    <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
-    <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
-    <data name="scores_out" label="${tool.name}_${dataMatrix_in.name}.kmeans" format="tabular" ></data>
+    <data name="sampleMetadata_out" label="${sampleMetadata_in.name}.kmeans-smpl" format="tabular" ></data>
+    <data name="variableMetadata_out" label="${variableMetadata_in.name}.kmeans-vrbl" format="tabular" ></data>
+    <data name="scores_out" label="${dataMatrix_in.name}.kmeans-score" format="tabular" ></data>
   </outputs>

   <tests>
@@ -208,15 +209,15 @@

 **Input files**

-+-------------------+-------------------------------------------------------------------------------------------------------------------+
-| Input File        | Download from URL                                                                                                 |
-+===================+===================================================================================================================+
-| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_dataMatrix.tsv       |
-+-------------------+-------------------------------------------------------------------------------------------------------------------+
-| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv   |
-+-------------------+-------------------------------------------------------------------------------------------------------------------+
-| Feature metadata  | https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |
-+-------------------+-------------------------------------------------------------------------------------------------------------------+
++-------------------------------------------------------------------------------------------------------------------+
+| Download from URL                                                                                                 |
++===================================================================================================================+
+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_dataMatrix.tsv       |
++-------------------------------------------------------------------------------------------------------------------+
+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv   |
++-------------------------------------------------------------------------------------------------------------------+
+| https://raw.githubusercontent.com/HegemanLab/w4mkmeans_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |
++-------------------------------------------------------------------------------------------------------------------+

 **Other input parameters**

@@ -227,9 +228,9 @@
 +-----------------+---------------+
 | kfeatures       | 5,6,7         |
 +-----------------+---------------+
-| iter_max        | 10            |
+| iter_max        | 20            |
 +-----------------+---------------+
-| nstart          | 1             |
+| nstart          | 20            |
 +-----------------+---------------+
 | algorithm       | Hartigan-Wong |
 +-----------------+---------------+
@@ -238,6 +239,7 @@
 NEWS
 ----

+- February 2018, Version 0.98.4 - Renamed output datasets to append 'kmeans-...'.
 - August 2017, Version 0.98.3 - Add (optional) prefix to category numbers for downstream tools that treat only non-numeric data as categorical.
 - August 2017, Version 0.98.1 - First release
--- a/w4mkmeans_routines.R	Wed Aug 09 17:55:08 2017 -0400
+++ b/w4mkmeans_routines.R	Tue Feb 27 22:51:04 2018 -0500
@@ -59,7 +59,7 @@
     stop("w4mkmeans: contract has been broken")
   }
   # extract parameters from 'env'
-  failure_action  <- env$log_print
+  log_action  <- env$log_print
   scores          <- c( "clusterOn\tk\ttotalSS\tbetweenSS\tproportion" )
   sampleMetadata  <- env$sampleMetadata
   featureMetadata <- env$variableMetadata
@@ -70,7 +70,7 @@
     i <- i[i > 0]         # eliminate non-positive integers
     i <- unique(sort(i))  # eliminate redundancy and disorder
     if (length(a)!=length(i)) {
-      failure_action("Some values for '", what, "' were skipped where not unique, not positive, or not convertible to an integer.")
+      log_action("Some values for '", what, "' were skipped where not unique, not positive, or not convertible to an integer.")
     }
     return (i)            # return results, if any
   }
@@ -78,13 +78,19 @@
   kfeatures       <- positive_ints(env$kfeatures, "kfeatures")

   myLapply <- parLapply
-  # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation)
-  # myLapply <- function(cl, ...) lapply(...)
   cl <- NULL
+  tryCatch(
+    expr = {
+      cl <- makePSOCKcluster(names = slots)
+    }
+    , error = function(e) {
+      log_action(sprintf("w4kmeans: falling back to serial evaluation because makePSOCKcluster(names = %d) threw an exception", slots))
+      # mimic parLapply, but without parallelization (as a last resort)
+      myLapply <<- function(cl, ...) lapply(...)
+    }
+  )
   if ( identical(myLapply, parLapply) ) {
-    failure_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots))
-    failure_action(names(cl))
-    cl <- makePSOCKcluster(names = slots)
+    log_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots))
     # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."
     clusterExport(
       cl = cl
@@ -97,12 +103,12 @@
     final <- function(cl) {
       # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."
       if ( !is.null(cl) ) {
-        failure_action("w4mkmeans: stopping cluster used for parallel evaluation")
+        log_action("w4mkmeans: stopping cluster used for parallel evaluation")
         stopCluster(cl)
       }
     }
   } else {
-    failure_action("w4mkmeans: using sequential evaluation (1 slot)")
+    log_action("w4mkmeans: using sequential evaluation (one slot)")
     final <- function(cl) { }
   }