Mercurial > repos > iuc > hyphy_annotate
comparison scripts/strike-ambigs.bf @ 0:be1650bef38c draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/hyphy/ commit 2742ee3b4e90f65352845265d2f85c4263e0eabb"
| author | iuc |
|---|---|
| date | Tue, 20 Apr 2021 10:17:07 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:be1650bef38c |
|---|---|
| 1 RequireVersion ("2.5.20"); | |
| 2 | |
| 3 LoadFunctionLibrary ("libv3/tasks/alignments.bf"); | |
| 4 LoadFunctionLibrary ("libv3/tasks/trees.bf"); | |
| 5 LoadFunctionLibrary ("libv3/UtilityFunctions.bf"); | |
| 6 LoadFunctionLibrary ("libv3/IOFunctions.bf"); | |
| 7 LoadFunctionLibrary ("libv3/convenience/math.bf"); | |
| 8 | |
| 9 | |
| 10 | |
| 11 filter.analysis_description = {terms.io.info : | |
| 12 " | |
| 13 Read an alignment of coding sequences and replace any ambiguous codons with ---. Write results to a new file in FASTA format, and report changed sequences to stdout | |
| 14 ", | |
| 15 terms.io.version : "0.1", | |
| 16 terms.io.reference : "TBD", | |
| 17 terms.io.authors : "Sergei L Kosakovsky Pond", | |
| 18 terms.io.contact : "spond@temple.edu", | |
| 19 terms.io.requirements : "An MSA" | |
| 20 }; | |
| 21 | |
| 22 | |
| 23 io.DisplayAnalysisBanner (filter.analysis_description); | |
| 24 | |
| 25 utility.SetEnvVariable ("NORMALIZE_SEQUENCE_NAMES", FALSE); | |
| 26 | |
| 27 KeywordArgument ("code", "Which genetic code should be used", "Universal"); | |
| 28 KeywordArgument ("alignment", "An in-frame codon alignment in one of the formats supported by HyPhy"); | |
| 29 | |
| 30 filter.in = alignments.PromptForGeneticCodeAndAlignment ("filter.dataset", "filter.input"); | |
| 31 | |
| 32 KeywordArgument ("output", ".fasta for compressed data", None); | |
| 33 filter.out = io.PromptUserForFilePath(".fasta for filtered data"); | |
| 34 fprintf (filter.out, CLEAR_FILE, KEEP_OPEN); | |
| 35 | |
| 36 GetDataInfo (filter.site_patterns, filter.input); | |
| 37 | |
| 38 filter.patter2site = {}; | |
| 39 | |
| 40 | |
| 41 for (i,j,v; in; filter.site_patterns) { | |
| 42 index = i+j; | |
| 43 if (filter.patter2site / v == FALSE ) { | |
| 44 filter.patter2site [v] = {}; | |
| 45 } | |
| 46 filter.patter2site [v] + index; | |
| 47 } | |
| 48 | |
| 49 GET_DATA_INFO_RETURNS_ONLY_THE_INDEX = TRUE; | |
| 50 COUNT_GAPS_IN_FREQUENCIES = FALSE; | |
| 51 filter.unique_patterns = utility.Array1D (filter.input.site_freqs); | |
| 52 | |
| 53 for (seq = 0; seq < filter.input.species; seq += 1) { | |
| 54 io.ReportProgressBar ("filter","Processing sequence " + (1+seq)); | |
| 55 codons = {1, filter.input.sites}; | |
| 56 codons [0] = ""; | |
| 57 GetString (seq_name, filter.input, seq); | |
| 58 GetDataInfo (seq_chars, filter.input, seq); | |
| 59 | |
| 60 filter.ambigs = 0; | |
| 61 | |
| 62 for (pattern = 0; pattern < filter.unique_patterns; pattern += 1) { | |
| 63 GetDataInfo (pattern_info, filter.input, seq, pattern); | |
| 64 if (pattern_info >= 0) { | |
| 65 codon_start = (filter.patter2site[pattern])[0] * 3; | |
| 66 codon = seq_chars [codon_start][codon_start+2]; | |
| 67 } else { | |
| 68 codon = "---"; | |
| 69 filter.ambigs += Abs (filter.patter2site [pattern]) | |
| 70 } | |
| 71 for (c; in; filter.patter2site [pattern] ) { | |
| 72 codons[c] = codon; | |
| 73 } | |
| 74 } | |
| 75 if (filter.ambigs > 0) { | |
| 76 fprintf (stdout, "\nStriking ", filter.ambigs, " codons that are incompletely resolved from " + seq_name + "\n"); | |
| 77 } | |
| 78 fprintf (filter.out,">",seq_name,"\n",Join ("", codons), "\n"); | |
| 79 } | |
| 80 | |
| 81 fprintf (filter.out,CLOSE_FILE); | |
| 82 | |
| 83 |
