Mercurial > repos > tduigou > sculpt_sequences
diff sculpt_sequences.py @ 1:a0cd867780ec draft default tip
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6b6ce806b5d016b3c7f20318180eff2dbe64395a-dirty
| author | tduigou |
|---|---|
| date | Thu, 17 Jul 2025 13:24:49 +0000 |
| parents | 0c7f75a2338b |
| children |
line wrap: on
line diff
--- a/sculpt_sequences.py Mon Jul 07 13:11:42 2025 +0000 +++ b/sculpt_sequences.py Thu Jul 17 13:24:49 2025 +0000 @@ -1,4 +1,6 @@ import argparse +import json +import sys import os import re import dnacauldron @@ -175,16 +177,20 @@ parser.add_argument("--outdir_unscul", required=True, help="unscul file dir") parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, help="Use file names as IDs (True/False)") - parser.add_argument("--avoid_patterns", required=True, + parser.add_argument("--avoid_patterns", required=False, help="List of patterns to avoid (comma-separated, e.g., 'BsaI_site,BsmBI_site')") - parser.add_argument("--gc_constraints", required=True, + parser.add_argument("--gc_constraints", required=False, help="GC content constraints as 'min;max;window' (space-separated, e.g., '0.3;0.7;100 0.1;0.3;100')") - parser.add_argument("--DnaOptimizationProblemClass", required=True, + parser.add_argument("--DnaOptimizationProblemClass", required=False, help="the class to use for DnaOptimizationProblem") - parser.add_argument("--hairpin_constraints", required=True, + parser.add_argument("--hairpin_constraints", required=False, help="Hairpin constraints as 'stem_size;window_size' (space-separated, e.g., '20;200 30;250')") - parser.add_argument("--kmer_size", required = True, + parser.add_argument("--kmer_size", required = False, help="K-mer uniqueness size (e.g., '15')") + parser.add_argument("--json_params", required=False, + help="JSON params for the tool") + parser.add_argument("--use_json_param", required=True, + help="If use JSON as param source") return parser.parse_args() @@ -206,19 +212,78 @@ # 4. k-mer size: single value or list kmer_size = [int(k.strip()) for k in args.kmer_size.strip().split(',') if k.strip()] if args.kmer_size.strip() else [] - return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size + # 5. DnaOptimizationProblemClass (as string) + DnaOptimizationProblemClass = args.DnaOptimizationProblemClass if args.DnaOptimizationProblemClass else None + + return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass +def load_constraints_from_json(json_path): + with open(json_path, 'r') as f: + params = json.load(f) + + def split_lines(val): + if isinstance(val, str): + return [line.strip() for line in val.strip().split('\n') if line.strip()] + return val + + avoid_patterns = split_lines(params.get("avoid_patterns", "")) + hairpin_constraints = split_lines(params.get("hairpin_constraints", "")) + gc_constraints = split_lines(params.get("gc_constraints", "")) + kmer_size = [int(k.strip()) for k in str(params.get("kmer_size", "")).split(',') if k.strip()] + DnaOptimizationProblemClass = params.get("DnaOptimizationProblemClass", None) + + return { + "avoid_patterns": avoid_patterns, + "hairpin_constraints": hairpin_constraints, + "gc_constraints": gc_constraints, + "kmer_size": kmer_size, + "DnaOptimizationProblemClass": DnaOptimizationProblemClass + } if __name__ == "__main__": args = parse_command_line_args() - avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, = extract_constraints_from_args(args) + avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass = extract_constraints_from_args(args) + + # Check if the flag --use_json_param is present and set to true + if "--use_json_param" in sys.argv: + use_json_index = sys.argv.index("--use_json_param") + 1 + use_json = sys.argv[use_json_index].lower() == "true" + else: + use_json = False + + # Now only check --json_params if use_json is True + if use_json: + if "--json_params" in sys.argv: + json_index = sys.argv.index("--json_params") + 1 + json_file = sys.argv[json_index] + if json_file.lower() != "none": + json_constraints = load_constraints_from_json(json_file) + avoid_patterns = json_constraints["avoid_patterns"] + hairpin_constraints = json_constraints["hairpin_constraints"] + gc_constraints = json_constraints["gc_constraints"] + kmer_size = json_constraints["kmer_size"] + DnaOptimizationProblemClass = json_constraints["DnaOptimizationProblemClass"] + + params = { + "files_to_sculpt": args.files_to_sculpt, + "file_name_mapping": args.file_name_mapping, + "outdir_unscul": args.outdir_unscul, + "outdir_scul": args.outdir_scul, + "use_file_names_as_id": args.use_file_names_as_id, + "avoid_patterns": avoid_patterns, + "hairpin_constraints": hairpin_constraints, + "gc_constraints": gc_constraints, + "kmer_size": kmer_size, + "DnaOptimizationProblemClass": DnaOptimizationProblemClass + } + sculpt_sequances( args.files_to_sculpt, args.file_name_mapping, args.outdir_scul, args.outdir_unscul, args.use_file_names_as_id, avoid_patterns, - gc_constraints, args.DnaOptimizationProblemClass, + gc_constraints, DnaOptimizationProblemClass, kmer_size, hairpin_constraints )
