comparison sculpt_sequences.py @ 1:a0cd867780ec draft default tip

planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb commit 6b6ce806b5d016b3c7f20318180eff2dbe64395a-dirty
author tduigou
date Thu, 17 Jul 2025 13:24:49 +0000
parents 0c7f75a2338b
children
comparison
equal deleted inserted replaced
0:0c7f75a2338b 1:a0cd867780ec
1 import argparse 1 import argparse
2 import json
3 import sys
2 import os 4 import os
3 import re 5 import re
4 import dnacauldron 6 import dnacauldron
5 import dnachisel 7 import dnachisel
6 from Bio import SeqIO 8 from Bio import SeqIO
173 help='Mapping of Galaxy filenames to original filenames') 175 help='Mapping of Galaxy filenames to original filenames')
174 parser.add_argument("--outdir_scul", required=True, help="scul file dir") 176 parser.add_argument("--outdir_scul", required=True, help="scul file dir")
175 parser.add_argument("--outdir_unscul", required=True, help="unscul file dir") 177 parser.add_argument("--outdir_unscul", required=True, help="unscul file dir")
176 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, 178 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True,
177 help="Use file names as IDs (True/False)") 179 help="Use file names as IDs (True/False)")
178 parser.add_argument("--avoid_patterns", required=True, 180 parser.add_argument("--avoid_patterns", required=False,
179 help="List of patterns to avoid (comma-separated, e.g., 'BsaI_site,BsmBI_site')") 181 help="List of patterns to avoid (comma-separated, e.g., 'BsaI_site,BsmBI_site')")
180 parser.add_argument("--gc_constraints", required=True, 182 parser.add_argument("--gc_constraints", required=False,
181 help="GC content constraints as 'min;max;window' (space-separated, e.g., '0.3;0.7;100 0.1;0.3;100')") 183 help="GC content constraints as 'min;max;window' (space-separated, e.g., '0.3;0.7;100 0.1;0.3;100')")
182 parser.add_argument("--DnaOptimizationProblemClass", required=True, 184 parser.add_argument("--DnaOptimizationProblemClass", required=False,
183 help="the class to use for DnaOptimizationProblem") 185 help="the class to use for DnaOptimizationProblem")
184 parser.add_argument("--hairpin_constraints", required=True, 186 parser.add_argument("--hairpin_constraints", required=False,
185 help="Hairpin constraints as 'stem_size;window_size' (space-separated, e.g., '20;200 30;250')") 187 help="Hairpin constraints as 'stem_size;window_size' (space-separated, e.g., '20;200 30;250')")
186 parser.add_argument("--kmer_size", required = True, 188 parser.add_argument("--kmer_size", required = False,
187 help="K-mer uniqueness size (e.g., '15')") 189 help="K-mer uniqueness size (e.g., '15')")
190 parser.add_argument("--json_params", required=False,
191 help="JSON params for the tool")
192 parser.add_argument("--use_json_param", required=True,
193 help="If use JSON as param source")
188 194
189 return parser.parse_args() 195 return parser.parse_args()
190 196
191 197
192 def extract_constraints_from_args(args): 198 def extract_constraints_from_args(args):
204 gc_constraints = re.split(split_pattern, args.gc_constraints.strip()) if args.gc_constraints.strip() else [] 210 gc_constraints = re.split(split_pattern, args.gc_constraints.strip()) if args.gc_constraints.strip() else []
205 211
206 # 4. k-mer size: single value or list 212 # 4. k-mer size: single value or list
207 kmer_size = [int(k.strip()) for k in args.kmer_size.strip().split(',') if k.strip()] if args.kmer_size.strip() else [] 213 kmer_size = [int(k.strip()) for k in args.kmer_size.strip().split(',') if k.strip()] if args.kmer_size.strip() else []
208 214
209 return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size 215 # 5. DnaOptimizationProblemClass (as string)
210 216 DnaOptimizationProblemClass = args.DnaOptimizationProblemClass if args.DnaOptimizationProblemClass else None
211 217
218 return avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass
219
220
221 def load_constraints_from_json(json_path):
222 with open(json_path, 'r') as f:
223 params = json.load(f)
224
225 def split_lines(val):
226 if isinstance(val, str):
227 return [line.strip() for line in val.strip().split('\n') if line.strip()]
228 return val
229
230 avoid_patterns = split_lines(params.get("avoid_patterns", ""))
231 hairpin_constraints = split_lines(params.get("hairpin_constraints", ""))
232 gc_constraints = split_lines(params.get("gc_constraints", ""))
233 kmer_size = [int(k.strip()) for k in str(params.get("kmer_size", "")).split(',') if k.strip()]
234 DnaOptimizationProblemClass = params.get("DnaOptimizationProblemClass", None)
235
236 return {
237 "avoid_patterns": avoid_patterns,
238 "hairpin_constraints": hairpin_constraints,
239 "gc_constraints": gc_constraints,
240 "kmer_size": kmer_size,
241 "DnaOptimizationProblemClass": DnaOptimizationProblemClass
242 }
212 243
213 if __name__ == "__main__": 244 if __name__ == "__main__":
214 args = parse_command_line_args() 245 args = parse_command_line_args()
215 246
216 avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, = extract_constraints_from_args(args) 247 avoid_patterns, hairpin_constraints, gc_constraints, kmer_size, DnaOptimizationProblemClass = extract_constraints_from_args(args)
248
249 # Check if the flag --use_json_param is present and set to true
250 if "--use_json_param" in sys.argv:
251 use_json_index = sys.argv.index("--use_json_param") + 1
252 use_json = sys.argv[use_json_index].lower() == "true"
253 else:
254 use_json = False
255
256 # Now only check --json_params if use_json is True
257 if use_json:
258 if "--json_params" in sys.argv:
259 json_index = sys.argv.index("--json_params") + 1
260 json_file = sys.argv[json_index]
261 if json_file.lower() != "none":
262 json_constraints = load_constraints_from_json(json_file)
263 avoid_patterns = json_constraints["avoid_patterns"]
264 hairpin_constraints = json_constraints["hairpin_constraints"]
265 gc_constraints = json_constraints["gc_constraints"]
266 kmer_size = json_constraints["kmer_size"]
267 DnaOptimizationProblemClass = json_constraints["DnaOptimizationProblemClass"]
268
269 params = {
270 "files_to_sculpt": args.files_to_sculpt,
271 "file_name_mapping": args.file_name_mapping,
272 "outdir_unscul": args.outdir_unscul,
273 "outdir_scul": args.outdir_scul,
274 "use_file_names_as_id": args.use_file_names_as_id,
275 "avoid_patterns": avoid_patterns,
276 "hairpin_constraints": hairpin_constraints,
277 "gc_constraints": gc_constraints,
278 "kmer_size": kmer_size,
279 "DnaOptimizationProblemClass": DnaOptimizationProblemClass
280 }
281
217 282
218 sculpt_sequances( 283 sculpt_sequances(
219 args.files_to_sculpt, args.file_name_mapping, 284 args.files_to_sculpt, args.file_name_mapping,
220 args.outdir_scul, args.outdir_unscul, 285 args.outdir_scul, args.outdir_unscul,
221 args.use_file_names_as_id, avoid_patterns, 286 args.use_file_names_as_id, avoid_patterns,
222 gc_constraints, args.DnaOptimizationProblemClass, 287 gc_constraints, DnaOptimizationProblemClass,
223 kmer_size, hairpin_constraints 288 kmer_size, hairpin_constraints
224 ) 289 )