Mercurial > repos > galaxyp > fragpipe
annotate genericize_db.py @ 8:012191b79fda draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 6413a461059c4a421a7812a08f244c224cde8ee2
| author | galaxyp | 
|---|---|
| date | Fri, 17 Oct 2025 16:22:03 +0000 | 
| parents | 41990c43f371 | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python3 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
2 # | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
3 # Prefixes sequence headers in the input FASTA file that are not formatted according to the UniProt, NCBI, or ENSEMBL formats with '>generic|' to avoid being misinterpreted by Philosopher. | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
4 # | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
5 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
6 import re | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
7 import sys | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
8 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
9 input_db_file = sys.argv[1] | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
10 output_db_file = sys.argv[2] | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
11 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
12 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
13 def sub_header(line): | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
14 return re.sub(r'^>(?!sp\||tr\||db\||AP_|NP_|YP_|XP_|WP_|ENSP|UniRef|nxp|generic)', '>generic|', line) | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
15 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
16 | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
17 with open(input_db_file) as in_file, open(output_db_file, 'w') as out_file: | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
18 for line in in_file: | 
| 
 
41990c43f371
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fragpipe commit 905cc2be18669cffe9ac6c46fcd08b6857a67f4f
 
galaxyp 
parents:  
diff
changeset
 | 
19 out_file.write(sub_header(line)) | 
