Mercurial > repos > devteam > cufflinks
comparison mass.py @ 4:f842d03b75c2 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/cufflinks/cufflinks commit a0b0845a9d1b3e7ecdeacd1e606133617e3918bd"
| author | iuc |
|---|---|
| date | Tue, 16 Jun 2020 16:57:48 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:3afd198a1cf0 | 4:f842d03b75c2 |
|---|---|
| 1 import shutil | |
| 2 import sys | |
| 3 import tempfile | |
| 4 | |
| 5 | |
| 6 def parse_gff_attributes(attr_str): | |
| 7 """ | |
| 8 Parses a GFF/GTF attribute string and returns a dictionary of name-value | |
| 9 pairs. The general format for a GFF3 attributes string is | |
| 10 | |
| 11 name1=value1;name2=value2 | |
| 12 | |
| 13 The general format for a GTF attribute string is | |
| 14 | |
| 15 name1 "value1" ; name2 "value2" | |
| 16 | |
| 17 The general format for a GFF attribute string is a single string that | |
| 18 denotes the interval's group; in this case, method returns a dictionary | |
| 19 with a single key-value pair, and key name is 'group' | |
| 20 """ | |
| 21 attributes_list = attr_str.split(";") | |
| 22 attributes = {} | |
| 23 for name_value_pair in attributes_list: | |
| 24 # Try splitting by '=' (GFF3) first because spaces are allowed in GFF3 | |
| 25 # attribute; next, try double quotes for GTF. | |
| 26 pair = name_value_pair.strip().split("=") | |
| 27 if len(pair) == 1: | |
| 28 pair = name_value_pair.strip().split("\"") | |
| 29 if len(pair) == 1: | |
| 30 # Could not split for some reason -- raise exception? | |
| 31 continue | |
| 32 if pair == '': | |
| 33 continue | |
| 34 name = pair[0].strip() | |
| 35 if name == '': | |
| 36 continue | |
| 37 # Need to strip double quote from values | |
| 38 value = pair[1].strip(" \"") | |
| 39 attributes[name] = value | |
| 40 | |
| 41 if len(attributes) == 0: | |
| 42 # Could not split attributes string, so entire string must be | |
| 43 # 'group' attribute. This is the case for strictly GFF files. | |
| 44 attributes['group'] = attr_str | |
| 45 return attributes | |
| 46 | |
| 47 | |
| 48 def gff_attributes_to_str(attrs, gff_format): | |
| 49 """ | |
| 50 Convert GFF attributes to string. Supported formats are GFF3, GTF. | |
| 51 """ | |
| 52 if gff_format == 'GTF': | |
| 53 format_string = '%s "%s"' | |
| 54 # Convert group (GFF) and ID, parent (GFF3) attributes to transcript_id, gene_id | |
| 55 id_attr = None | |
| 56 if 'group' in attrs: | |
| 57 id_attr = 'group' | |
| 58 elif 'ID' in attrs: | |
| 59 id_attr = 'ID' | |
| 60 elif 'Parent' in attrs: | |
| 61 id_attr = 'Parent' | |
| 62 if id_attr: | |
| 63 attrs['transcript_id'] = attrs['gene_id'] = attrs[id_attr] | |
| 64 elif gff_format == 'GFF3': | |
| 65 format_string = '%s=%s' | |
| 66 attrs_strs = [] | |
| 67 for name, value in attrs.items(): | |
| 68 attrs_strs.append(format_string % (name, value)) | |
| 69 return " ; ".join(attrs_strs) | |
| 70 | |
| 71 | |
| 72 stderr = sys.argv[1] | |
| 73 global_model_file_name = sys.argv[2] | |
| 74 transcripts = sys.argv[3] | |
| 75 | |
| 76 # Read standard error to get total map/upper quartile mass. | |
| 77 total_map_mass = -1 | |
| 78 with open(stderr, 'r') as tmp_stderr2: | |
| 79 for line in tmp_stderr2: | |
| 80 if line.lower().find("map mass") >= 0 or line.lower().find("upper quartile") >= 0: | |
| 81 total_map_mass = float(line.split(":")[1].strip()) | |
| 82 break | |
| 83 | |
| 84 if global_model_file_name != "None": | |
| 85 # Global model is simply total map mass from original run. | |
| 86 with open(global_model_file_name, 'r') as global_model_file: | |
| 87 global_model_total_map_mass = float(global_model_file.readline()) | |
| 88 | |
| 89 # Ratio of global model's total map mass to original run's map mass is | |
| 90 # factor used to adjust FPKM. | |
| 91 fpkm_map_mass_ratio = total_map_mass / global_model_total_map_mass | |
| 92 | |
| 93 # Update FPKM values in transcripts.gtf file. | |
| 94 with open(transcripts, 'r') as transcripts_file: | |
| 95 with tempfile.NamedTemporaryFile(dir=".", delete=False) as new_transcripts_file: | |
| 96 for line in transcripts_file: | |
| 97 fields = line.split('\t') | |
| 98 attrs = parse_gff_attributes(fields[8]) | |
| 99 attrs["FPKM"] = str(float(attrs["FPKM"]) * fpkm_map_mass_ratio) | |
| 100 attrs["conf_lo"] = str(float(attrs["conf_lo"]) * fpkm_map_mass_ratio) | |
| 101 attrs["conf_hi"] = str(float(attrs["conf_hi"]) * fpkm_map_mass_ratio) | |
| 102 fields[8] = gff_attributes_to_str(attrs, "GTF") | |
| 103 new_transcripts_file.write("%s\n" % '\t'.join(fields)) | |
| 104 shutil.move(new_transcripts_file.name, transcripts) | |
| 105 | |
| 106 if total_map_mass > -1: | |
| 107 with open("global_model.txt", 'w') as f: | |
| 108 f.write("%f\n" % total_map_mass) |
