Mercurial > repos > tduigou > sculpt_sequences
comparison sculpt_sequences.py @ 1:a0cd867780ec draft default tip
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6b6ce806b5d016b3c7f20318180eff2dbe64395a-dirty
| author | tduigou |
|---|---|
| date | Thu, 17 Jul 2025 13:24:49 +0000 |
| parents | 0c7f75a2338b |
| children |
comparison
equal
deleted
inserted
replaced
| 0:0c7f75a2338b | 1:a0cd867780ec |
|---|---|
| 1 import argparse | 1 import argparse |
| 2 import json | |
| 3 import sys | |
| 2 import os | 4 import os |
| 3 import re | 5 import re |
| 4 import dnacauldron | 6 import dnacauldron |
| 5 import dnachisel | 7 import dnachisel |
| 6 from Bio import SeqIO | 8 from Bio import SeqIO |
| 173 help='Mapping of Galaxy filenames to original filenames') | 175 help='Mapping of Galaxy filenames to original filenames') |
| 174 parser.add_argument("--outdir_scul", required=True, help="scul file dir") | 176 parser.add_argument("--outdir_scul", required=True, help="scul file dir") |
| 175 parser.add_argument("--outdir_unscul", required=True, help="unscul file dir") | 177 parser.add_argument("--outdir_unscul", required=True, help="unscul file dir") |
| 176 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, | 178 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, |
| 177 help="Use file names as IDs (True/False)") | 179 help="Use file names as IDs (True/False)") |
| 178 parser.add_argument("--avoid_patterns", required=True, | 180 parser.add_argument("--avoid_patterns", required=False, |
| 179 help="List of patterns to avoid (comma-separated, e.g., 'BsaI_site,BsmBI_site')") | 181 help="List of patterns to avoid (comma-separated, e.g., 'BsaI_site,BsmBI_site')") |
| 180 parser.add_argument("--gc_constraints", required=True, | 182 parser.add_argument("--gc_constraints", required=False, |
| 181 help="GC content constraints as 'min;max;window' (space-separated, e.g., '0.3;0.7;100 0.1;0.3;100')") | 183 help="GC content constraints as 'min;max;window' (space-separated, e.g., '0.3;0.7;100 0.1;0.3;100')") |
| 182 parser.add_argument("--DnaOptimizationProblemClass", required=True, | 184 parser.add_argument("--DnaOptimizationProblemClass", required=False, |
| 183 help="the class to use for DnaOptimizationProblem") | 185 help="the class to use for DnaOptimizationProblem") |
| 184 parser.add_argument("--hairpin_constraints", required=True, | 186 parser.add_argument("--hairpin_constraints", required=False, |
| 185 help="Hairpin constraints as 'stem_size;window_size' (space-separated, e.g., '20;200 30;250')") | 187 help="Hairpin constraints as 'stem_size;window_size' (space-separated, e.g., '20;200 30;250')") |
| 186 parser.add_argument("--kmer_size", required = True, | 188 parser.add_argument("--kmer_size", required = False, |
| 187 help="K-mer uniqueness size (e.g., '15')") | 189 help="K-mer uniqueness size (e.g., '15')") |
| 190 parser.add_argument("--json_params", required=False, | |
| 191 help="JSON params for the tool") | |
| 192 parser.add_argument("--use_json_param", required=True, | |
| 193 help="If use JSON as param source") | |
| 188 | 194 |
| 189 return parser.parse_args() | 195 return parser.parse_args() |
| 190 | 196 |
| 191 | 197 |
| 192 def extract_constraints_from_args(args): | 198 def extract_constraints_from_args(args): |
| 204 gc_constraints = re.split(split_pattern, args.gc_constraints.strip()) if args.gc_constraints.strip() else [] | 210 gc_constraints = re.split(split_pattern, args.gc_constraints.strip()) if args.gc_constraints.strip() else [] |
| 205 | 211 |
| 206 # 4. k-mer size: single value or list | 212 # 4. k-mer size: single value or list |
| 207 kmer_size = [int(k.strip()) for k in args.kmer_size.strip().split(',') if k.strip()] if args.kmer_size.strip() else [] | 213 kmer_size = [int(k.strip()) for k in args.kmer_size.strip().split(',') if k.strip()] if args.kmer_size.strip() else [] |
| 208 | 214 |
| 209 return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size | 215 # 5. DnaOptimizationProblemClass (as string) |
| 210 | 216 DnaOptimizationProblemClass = args.DnaOptimizationProblemClass if args.DnaOptimizationProblemClass else None |
| 211 | 217 |
| 218 return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass | |
| 219 | |
| 220 | |
| 221 def load_constraints_from_json(json_path): | |
| 222 with open(json_path, 'r') as f: | |
| 223 params = json.load(f) | |
| 224 | |
| 225 def split_lines(val): | |
| 226 if isinstance(val, str): | |
| 227 return [line.strip() for line in val.strip().split('\n') if line.strip()] | |
| 228 return val | |
| 229 | |
| 230 avoid_patterns = split_lines(params.get("avoid_patterns", "")) | |
| 231 hairpin_constraints = split_lines(params.get("hairpin_constraints", "")) | |
| 232 gc_constraints = split_lines(params.get("gc_constraints", "")) | |
| 233 kmer_size = [int(k.strip()) for k in str(params.get("kmer_size", "")).split(',') if k.strip()] | |
| 234 DnaOptimizationProblemClass = params.get("DnaOptimizationProblemClass", None) | |
| 235 | |
| 236 return { | |
| 237 "avoid_patterns": avoid_patterns, | |
| 238 "hairpin_constraints": hairpin_constraints, | |
| 239 "gc_constraints": gc_constraints, | |
| 240 "kmer_size": kmer_size, | |
| 241 "DnaOptimizationProblemClass": DnaOptimizationProblemClass | |
| 242 } | |
| 212 | 243 |
| 213 if __name__ == "__main__": | 244 if __name__ == "__main__": |
| 214 args = parse_command_line_args() | 245 args = parse_command_line_args() |
| 215 | 246 |
| 216 avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, = extract_constraints_from_args(args) | 247 avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass = extract_constraints_from_args(args) |
| 248 | |
| 249 # Check if the flag --use_json_param is present and set to true | |
| 250 if "--use_json_param" in sys.argv: | |
| 251 use_json_index = sys.argv.index("--use_json_param") + 1 | |
| 252 use_json = sys.argv[use_json_index].lower() == "true" | |
| 253 else: | |
| 254 use_json = False | |
| 255 | |
| 256 # Now only check --json_params if use_json is True | |
| 257 if use_json: | |
| 258 if "--json_params" in sys.argv: | |
| 259 json_index = sys.argv.index("--json_params") + 1 | |
| 260 json_file = sys.argv[json_index] | |
| 261 if json_file.lower() != "none": | |
| 262 json_constraints = load_constraints_from_json(json_file) | |
| 263 avoid_patterns = json_constraints["avoid_patterns"] | |
| 264 hairpin_constraints = json_constraints["hairpin_constraints"] | |
| 265 gc_constraints = json_constraints["gc_constraints"] | |
| 266 kmer_size = json_constraints["kmer_size"] | |
| 267 DnaOptimizationProblemClass = json_constraints["DnaOptimizationProblemClass"] | |
| 268 | |
| 269 params = { | |
| 270 "files_to_sculpt": args.files_to_sculpt, | |
| 271 "file_name_mapping": args.file_name_mapping, | |
| 272 "outdir_unscul": args.outdir_unscul, | |
| 273 "outdir_scul": args.outdir_scul, | |
| 274 "use_file_names_as_id": args.use_file_names_as_id, | |
| 275 "avoid_patterns": avoid_patterns, | |
| 276 "hairpin_constraints": hairpin_constraints, | |
| 277 "gc_constraints": gc_constraints, | |
| 278 "kmer_size": kmer_size, | |
| 279 "DnaOptimizationProblemClass": DnaOptimizationProblemClass | |
| 280 } | |
| 281 | |
| 217 | 282 |
| 218 sculpt_sequances( | 283 sculpt_sequances( |
| 219 args.files_to_sculpt, args.file_name_mapping, | 284 args.files_to_sculpt, args.file_name_mapping, |
| 220 args.outdir_scul, args.outdir_unscul, | 285 args.outdir_scul, args.outdir_unscul, |
| 221 args.use_file_names_as_id, avoid_patterns, | 286 args.use_file_names_as_id, avoid_patterns, |
| 222 gc_constraints, args.DnaOptimizationProblemClass, | 287 gc_constraints, DnaOptimizationProblemClass, |
| 223 kmer_size, hairpin_constraints | 288 kmer_size, hairpin_constraints |
| 224 ) | 289 ) |
