annotate w4mkmeans_routines.R @ 4:06385448ff7f draft

planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
author eschen42
date Wed, 09 Aug 2017 17:55:08 -0400
parents 330ee1d840db
children 6817b036b06e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
1 ##------------------------------------------------------------------------------------------------------
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
2 ## these are the batch-independent and file-structure-independent routines to support the w4mkmeans tool
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
3 ##------------------------------------------------------------------------------------------------------
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
4
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
5 library(parallel)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
6
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
7 w4kmeans_usage <- function() {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
8 return (
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
9 c(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
10 "w4mkmeans: bad input.",
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
11 "# contract:",
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
12 " required - caller will provide an environment comprising:",
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
13 " log_print - a logging function with the signature function(x, ...) expecting strings as x and ...",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
14 " variableMetadata - the corresponding W4M data.frame having feature metadata",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
15 " sampleMetdata - the corresponding W4M data.frame having sample metadata",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
16 " dataMatrix - the corresponding W4M matrix",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
17 " slots - the number of parallel slots for calculating kmeans",
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
18 " optional - environment may comprise:",
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
19 " kfeatures - an array of integers, the k's to apply for clustering by feature (default, empty array)",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
20 " ksamples - an array of integers, the k's to apply for clustering by sample (default, empty array)",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
21 " iter.max - the maximum number of iterations when calculating a cluster (default = 10)",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
22 " nstart - how many random sets of centers should be chosen (default = 1)",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
23 " algorithm - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
24 " categorical_prefix - string from c('Hartigan-Wong', 'Lloyd', 'Forgy', 'MacQueen') (default = Hartigan-Wong)",
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
25 " ",
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
26 " this routine will return a list comprising:",
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
27 " variableMetadata - the input variableMetadata data.frame with updates, if any",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
28 " sampleMetadata - the input sampleMetadata data.frame with updates, if any",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
29 " scores - an array of strings, each representing a line of a tsv having the following header:",
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
30 " clusterOn TAB k TAB totalSS TAB betweenSS TAB proportion"
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
31 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
32 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
33 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
34
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
35 w4mkmeans <- function(env) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
36 # abort if 'env' is null or is not an environment
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
37 if ( is.null(env) || ! is.environment(env) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
38 lapply(w4kmeans_usage(),print)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
39 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
40 # supply default arguments
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
41 if ( ! exists("iter.max" , env) ) env$iter.max <- 10
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
42 if ( ! exists("nstart" , env) ) env$nstart <- 1
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
43 if ( ! exists("algorithm" , env) ) env$algorithm <- 'Hartigan-Wong'
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
44 if ( ! exists("categorical_prefix", env) ) env$categorical_prefix <- 'k'
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
45 if ( ! exists("ksamples" , env) ) env$ksamples <- c()
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
46 if ( ! exists("kfeatures" , env) ) env$kfeatures <- c()
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
47 # check mandatory arguments
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
48 expected <- c(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
49 "log_print"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
50 , "variableMetadata"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
51 , "sampleMetadata"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
52 , "dataMatrix"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
53 , "slots"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
54 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
55 missing_from_env <- setdiff(expected, (ls(env)))
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
56 if ( length(missing_from_env) > 0 ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
57 print(paste(c('expected environment members not found: ', as.character(missing_from_env)), collapse = ", "))
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
58 lapply(w4kmeans_usage(),print)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
59 stop("w4mkmeans: contract has been broken")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
60 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
61 # extract parameters from 'env'
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
62 failure_action <- env$log_print
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
63 scores <- c( "clusterOn\tk\ttotalSS\tbetweenSS\tproportion" )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
64 sampleMetadata <- env$sampleMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
65 featureMetadata <- env$variableMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
66 slots <- env$slots
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
67 positive_ints <- function(a, what) {
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
68 i <- as.integer(a) # may introduce NAs by coercion
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
69 i <- i[!is.na(i)] # eliminate NAs
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
70 i <- i[i > 0] # eliminate non-positive integers
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
71 i <- unique(sort(i)) # eliminate redundancy and disorder
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
72 if (length(a)!=length(i)) {
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
73 failure_action("Some values for '", what, "' were skipped where not unique, not positive, or not convertible to an integer.")
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
74 }
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
75 return (i) # return results, if any
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
76 }
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
77 ksamples <- positive_ints(env$ksamples , "ksamples")
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
78 kfeatures <- positive_ints(env$kfeatures, "kfeatures")
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
79
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
80 myLapply <- parLapply
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
81 # uncomment the next line to mimic parLapply, but without parallelization (for testing/experimentation)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
82 # myLapply <- function(cl, ...) lapply(...)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
83 cl <- NULL
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
84 if ( identical(myLapply, parLapply) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
85 failure_action(sprintf("w4mkmeans: using parallel evaluation with %d slots", slots))
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
86 failure_action(names(cl))
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
87 cl <- makePSOCKcluster(names = slots)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
88 # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
89 clusterExport(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
90 cl = cl
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
91 , varlist = c(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
92 "tryCatchFunc"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
93 , "calc_kmeans_one_dimension_one_k"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
94 , "prepare.data.matrix"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
95 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
96 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
97 final <- function(cl) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
98 # from ?makePSOCKcluster: "It is good practice to shut down the workers by calling stopCluster."
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
99 if ( !is.null(cl) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
100 failure_action("w4mkmeans: stopping cluster used for parallel evaluation")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
101 stopCluster(cl)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
102 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
103 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
104 } else {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
105 failure_action("w4mkmeans: using sequential evaluation (1 slot)")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
106 final <- function(cl) { }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
107 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
108
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
109 tryCatch(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
110 expr = {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
111 # These myLapply calls produce lists of lists of results:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
112 # - The outer list has no keys and its members are accessed by index
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
113 # - The inner list has keys "clusters" and "scores"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
114
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
115 # for each $i in ksamples, append column 'k$i' to data frame sampleMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
116 ksamples_length <- length(ksamples)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
117 if ( ksamples_length > 0 ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
118 smpl_result_list <- myLapply(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
119 cl = cl
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
120 , ksamples
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
121 , calc_kmeans_one_dimension_one_k
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
122 , env = env
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
123 , dimension = "samples"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
124 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
125 for ( i in 1:ksamples_length ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
126 result <- smpl_result_list[[i]]
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
127 if (result$success) {
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
128 sampleMetadata[sprintf("k%d",ksamples[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters)
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
129 scores <- c(scores, result$value$scores)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
130 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
131 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
132 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
133
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
134 # for each $i in kfeatures, append column 'k$i' to data frame featureMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
135 kfeatures_length <- length(kfeatures)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
136 if ( kfeatures_length > 0 ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
137 feat_result_list <- myLapply(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
138 cl = cl
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
139 , kfeatures
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
140 , calc_kmeans_one_dimension_one_k
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
141 , env = env
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
142 , dimension = "features"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
143 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
144 for ( i in 1:kfeatures_length ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
145 result <- feat_result_list[[i]]
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
146 if (result$success) {
4
06385448ff7f planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit f600ce8a783df16e49272341dce0fc6bbc299b0a
eschen42
parents: 0
diff changeset
147 featureMetadata[sprintf("k%d",kfeatures[i])] <- sprintf("%s%d", env$categorical_prefix, result$value$clusters)
0
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
148 scores <- c(scores, result$value$scores)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
149 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
150 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
151 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
152
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
153 return (
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
154 list(
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
155 variableMetadata = featureMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
156 , sampleMetadata = sampleMetadata
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
157 , scores = scores
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
158 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
159 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
160 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
161 , finally = final(cl)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
162 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
163 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
164
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
165 # calculate k-means for features or samples
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
166 # - recall that the dataMatrix has features in rows and samples in columns
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
167 # return value:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
168 # list(clusters = km$cluster, scores = scores)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
169 # arguments:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
170 # env:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
171 # environment having dataMatrix
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
172 # dimension:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
173 # - "samples": produce clusters column to add to the sampleMetadata table
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
174 # - this is the default case
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
175 # - "variables": produce clusters column to add to the variableMetadata table
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
176 # k:
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
177 # integer, the number of clusters to make
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
178 calc_kmeans_one_dimension_one_k <- function(k, env, dimension = "samples") {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
179 # abort if environment is not as expected
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
180 if ( is.null(env) || ! is.environment(env) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
181 stop("calc_kmeans_one_dimension_one_k - argument 'env' is not an environment")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
182 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
183 if ( ! exists("log_print", env) || ! is.function(env$log_print) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
184 stop("calc_kmeans_one_dimension_one_k - argument 'env' - environment does not include log_print or it is not a function")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
185 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
186 # abort if k is not as expected
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
187 if ( ! is.numeric(k) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
188 stop(sprintf("calc_kmeans_one_dimension_one_k - expected numeric argument 'k' but type is %s", typeof(k)))
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
189 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
190 k <- as.integer(k)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
191 # abort if dimension is not as expected
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
192 if ( ! is.character(dimension)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
193 || ! Reduce( f =`|`, x = sapply(X = c("features","samples"), FUN = `==`, dimension), init = FALSE) ) {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
194 stop("calc_kmeans_one_dimension_one_k - argument 'dimension' is neither 'features' nor 'samples'")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
195 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
196 dm <- env$dataMatrix
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
197 iter.max <- env$iter.max
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
198 nstart <- env$nstart
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
199 algorithm <- env$algorithm
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
200 dim_features <- dimension == "features"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
201 # tryCatchFunc produces a list
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
202 # On success of expr(), tryCatchFunc produces
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
203 # list(success TRUE, value = expr(), msg = "")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
204 # On failure of expr(), tryCatchFunc produces
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
205 # list(success = FALSE, value = NA, msg = "the error message")
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
206 result_list <- tryCatchFunc( expr = function() {
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
207 # kmeans clusters the rows; features are the columns of args_env$dataMatrix; samples, the rows
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
208 # - to calculate sample-clusters, no transposition is needed because samples are rows
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
209 # - to calculate feature-clusters, transposition is needed so that features will be the rows
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
210 if ( ! dim_features ) dm <- t(dm)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
211 dm <- prepare.data.matrix( x.matrix = dm, data.transformation = function(x) { x } )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
212 # need to set.seed to get reproducible results from kmeans
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
213 set.seed(4567)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
214 # do the k-means clustering
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
215 km <- kmeans( x = dm, centers = k, iter.max, nstart = nstart, algorithm = algorithm )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
216 scores <-
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
217 sprintf("%s\t%d\t%0.5e\t%0.5e\t%0.5f"
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
218 , dimension
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
219 , k
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
220 , km$totss
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
221 , km$betweenss
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
222 , km$betweenss/km$totss
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
223 )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
224 list(clusters = km$cluster, scores = scores)
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
225 })
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
226 return ( result_list )
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
227 }
330ee1d840db planemo upload for repository https://github.com/HegemanLab/w4mkmeans_galaxy_wrapper/tree/master commit ae8ee04dc44eedf9121644ce8577c622da2a5f8c-dirty
eschen42
parents:
diff changeset
228