annotate main.R @ 5:3a8864093eac draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
author recetox
date Fri, 18 Jun 2021 20:51:55 +0000
parents
children 27853894eeac
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
1 library(recetox.aplcms)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
2
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
3 save_extracted_features <- function(df, filename) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
4 df <- as.data.frame(df)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
5 columns <- c("mz", "pos", "sd1", "sd2", "area")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
6 arrow::write_parquet(df[columns], filename)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
7 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
8
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
9 save_feature_sample_table <- function(df, filename) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
10 columns <- c("feature", "mz", "rt", "sample", "sample_rt", "sample_intensity")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
11 arrow::write_parquet(df[columns], filename)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
12 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
13
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
14 known_table_columns <- function() {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
15 c("chemical_formula", "HMDB_ID", "KEGG_compound_ID", "mass", "ion.type",
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
16 "m.z", "Number_profiles_processed", "Percent_found", "mz_min", "mz_max",
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
17 "RT_mean", "RT_sd", "RT_min", "RT_max", "int_mean(log)", "int_sd(log)",
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
18 "int_min(log)", "int_max(log)")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
19 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
20
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
21 save_known_table <- function(df, filename) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
22 columns <- known_table_columns()
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
23 arrow::write_parquet(df[columns], filename)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
24 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
25
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
26 read_known_table <- function(filename) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
27 arrow::read_parquet(filename, col_select = known_table_columns())
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
28 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
29
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
30 save_pairing <- function(df, filename) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
31 write.table(df, filename, row.names = FALSE, col.names = c("new", "old"))
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
32 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
33
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
34 save_all_extracted_features <- function(dfs, filenames) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
35 filenames <- tools::file_path_sans_ext(basename(filenames))
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
36 filenames <- paste0(filenames, ".parquet")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
37 filenames <- file.path("extracted", filenames)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
38 dir.create("extracted")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
39 mapply(save_extracted_features, dfs, filenames)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
40 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
41
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
42 save_all_corrected_features <- function(dfs, filenames) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
43 filenames <- tools::file_path_sans_ext(basename(filenames))
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
44 filenames <- paste0(filenames, ".parquet")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
45 filenames <- file.path("corrected", filenames)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
46 dir.create("corrected")
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
47 mapply(save_extracted_features, dfs, filenames)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
48 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
49
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
50 unsupervised_main <- function(sample_files, aligned_file, recovered_file, ...) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
51 sample_files <- sort_samples_by_acquisition_number(sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
52
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
53 res <- unsupervised(filenames = sample_files, ...)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
54
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
55 save_all_extracted_features(res$extracted_features, sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
56 save_all_corrected_features(res$corrected_features, sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
57
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
58 save_feature_sample_table(res$aligned_feature_sample_table, aligned_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
59 save_feature_sample_table(res$recovered_feature_sample_table, recovered_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
60 }
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
61
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
62 hybrid_main <- function(sample_files, known_table_file, updated_known_table_file, pairing_file, aligned_file, recovered_file, ...) {
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
63 sample_files <- sort_samples_by_acquisition_number(sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
64
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
65 known <- read_known_table(known_table_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
66 res <- hybrid(filenames = sample_files, known_table = known, ...)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
67
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
68 save_known_table(res$updated_known_table, updated_known_table_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
69 save_pairing(res$features_known_table_pairing, pairing_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
70
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
71 save_all_extracted_features(res$extracted_features, sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
72 save_all_corrected_features(res$corrected_features, sample_files)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
73
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
74 save_feature_sample_table(res$aligned_feature_sample_table, aligned_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
75 save_feature_sample_table(res$recovered_feature_sample_table, recovered_file)
3a8864093eac "planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox_aplcms commit 831143b00343dbf66d738d5b5e7cbe9ee6f99594"
recetox
parents:
diff changeset
76 }