Mercurial > repos > recetox > spec2vec_training
annotate spec2vec_training_wrapper.py @ 0:4e4d3c8efb22 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
author | recetox |
---|---|
date | Thu, 05 Jan 2023 10:08:01 +0000 |
parents | |
children |
rev | line source |
---|---|
0
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
2 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
3 import argparse |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
4 import sys |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
5 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
6 from matchms.importing import load_from_mgf, load_from_msp |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
7 from spec2vec import SpectrumDocument |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
8 from spec2vec.model_building import train_new_word2vec_model |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
9 from spec2vec.serialization import export_model |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
10 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
11 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
12 def read_spectra(spectra_file, file_format): |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
13 if file_format == "mgf": |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
14 return load_from_mgf(spectra_file) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
15 elif file_format == "msp": |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
16 return load_from_msp(spectra_file) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
17 else: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
18 raise NotImplementedError(f"Unsupported file format: {file_format}.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
19 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
20 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
21 def parse_checkpoints_input(checkpoints_input): |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
22 checkpoints_str = checkpoints_input.replace(" ", "").split(",") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
23 try: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
24 checkpoints_int = map(int, checkpoints_str) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
25 except ValueError: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
26 raise ValueError("Checkpoint values must be integers.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
27 return list(set(checkpoints_int)) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
28 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
29 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
30 def main(argv): |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
31 parser = argparse.ArgumentParser(description="Train a spec2vec model.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
32 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
33 # Input data |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
34 parser.add_argument("--spectra_filename", type=str, help="Path to a file containing spectra.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
35 parser.add_argument("--spectra_fileformat", type=str, help="Spectra file format.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
36 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
37 # Training parameters |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
38 parser.add_argument("--epochs", type=int, default=0, help="Number of epochs to train the model.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
39 parser.add_argument("--checkpoints", type=str, default=None, help="Epochs after which to save the model.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
40 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
41 # Hyperparameters |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
42 parser.add_argument("--vector_size", type=int, default=100, help="Dimensionality of the feature vectors.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
43 parser.add_argument("--alpha", type=float, default=0.025, help="The initial learning rate.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
44 parser.add_argument("--window", type=int, default=5, help="The maximum distance between the current and predicted peak within a spectrum.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
45 parser.add_argument("--min_count", type=int, default=5, help="Ignores all peaks with total frequency lower than this.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
46 parser.add_argument("--sample", type=float, default=0.001, help="The threshold for configuring which higher-frequency peaks are randomly downsampled.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
47 parser.add_argument("--seed", type=int, default=1, help="A seed for model reproducibility.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
48 parser.add_argument("--min_alpha", type=float, default=0.0001, help="Learning rate will linearly drop to min_alpha as training progresses.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
49 parser.add_argument("--sg", type=int, default=0, help="Training algorithm: 1 for skip-gram; otherwise CBOW.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
50 parser.add_argument("--hs", type=int, default=0, help="If 1, hierarchical softmax will be used for model training. If set to 0, and negative is non-zero, negative sampling will be used.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
51 parser.add_argument("--negative", type=int, default=5, help="If > 0, negative sampling will be used, the int for negative specifies how many “noise words” should be drawn (usually between 5-20). If set to 0, no negative sampling is used.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
52 parser.add_argument("--ns_exponent", type=float, default=0.75, help="The exponent used to shape the negative sampling distribution.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
53 parser.add_argument("--cbow_mean", type=int, default=1, help="If 0, use the sum of the context word vectors. If 1, use the mean. Only applies when cbow is used.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
54 parser.add_argument("--sorted_vocab", type=bool, default=True, help="If 1, sort the vocabulary by descending frequency before assigning word indexes.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
55 parser.add_argument("--batch_words", type=int, default=10000, help="Target size (in words) for batches of examples passed to worker threads (and thus cython routines). Larger batches will be passed if individual texts are longer than 10000 words, but the standard cython code truncates to that maximum.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
56 parser.add_argument("--shrink_windows", type=bool, default=True, help="If 1, the input sentence will be truncated to the window size.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
57 parser.add_argument("--max_vocab_size", type=int, default=None, help="Limits the RAM during vocabulary building; if there are more unique words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM. Set to None for no limit (default).") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
58 parser.add_argument("--n_decimals", type=int, default=2, help="Rounds peak position to this number of decimals.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
59 parser.add_argument("--n_workers", type=int, default=1, help="Number of worker nodes to train the model.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
60 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
61 # Output files |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
62 parser.add_argument("--model_filename_pickle", type=str, help="If specified, the model will also be saved as a pickle file.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
63 parser.add_argument("--model_filename", type=str, help="Path to the output model json-file.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
64 parser.add_argument("--weights_filename", type=str, help="Path to the output weights json-file.") |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
65 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
66 args = parser.parse_args() |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
67 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
68 # Load the spectra |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
69 spectra = list(read_spectra(args.spectra_filename, args.spectra_fileformat)) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
70 reference_documents = [SpectrumDocument(spectrum, n_decimals=args.n_decimals) for spectrum in spectra] |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
71 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
72 # Process epoch arguments |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
73 if args.checkpoints: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
74 iterations = parse_checkpoints_input(args.checkpoints) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
75 else: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
76 iterations = args.epochs |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
77 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
78 # Train a model |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
79 model = train_new_word2vec_model( |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
80 documents=reference_documents, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
81 iterations=iterations, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
82 filename="spec2vec", |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
83 progress_logger=True, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
84 workers=args.n_workers, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
85 vector_size=args.vector_size, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
86 learning_rate_initial=args.alpha, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
87 learning_rate_decay=args.min_alpha, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
88 window=args.window, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
89 min_count=args.min_count, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
90 sample=args.sample, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
91 seed=args.seed, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
92 sg=args.sg, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
93 hs=args.hs, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
94 negative=args.negative, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
95 ns_exponent=args.ns_exponent, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
96 cbow_mean=args.cbow_mean, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
97 sorted_vocab=args.sorted_vocab, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
98 batch_words=args.batch_words, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
99 shrink_windows=args.shrink_windows, |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
100 max_vocab_size=args.max_vocab_size) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
101 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
102 # Save the model |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
103 if args.model_filename_pickle: |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
104 model.save(args.model_filename_pickle) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
105 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
106 export_model(model, args.model_filename, args.weights_filename) |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
107 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
108 |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
109 if __name__ == "__main__": |
4e4d3c8efb22
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/spec2vec commit 2e4bdc2fd94445aa5a8d1882a3d092cca727e4b6
recetox
parents:
diff
changeset
|
110 main(argv=sys.argv[1:]) |