Mercurial > repos > greg > plant_tribes_gene_family_scaffold_loader
comparison gene_family_scaffold_loader.py @ 10:11a36e425c94 draft
Uploaded
author | greg |
---|---|
date | Thu, 27 Sep 2018 12:49:17 -0400 |
parents | 3841f7252b1d |
children | 2fac73ec6ee8 |
comparison
equal
deleted
inserted
replaced
9:1a5fecd8e5c1 | 10:11a36e425c94 |
---|---|
103 1. Parse all of the *.list files in the same directory to populate | 103 1. Parse all of the *.list files in the same directory to populate |
104 self.scaffold_genes_dict. | 104 self.scaffold_genes_dict. |
105 """ | 105 """ |
106 scaffold_id = os.path.basename(self.args.scaffold_path) | 106 scaffold_id = os.path.basename(self.args.scaffold_path) |
107 file_dir = os.path.join(self.args.scaffold_path, 'annot') | 107 file_dir = os.path.join(self.args.scaffold_path, 'annot') |
108 # The scaffol naming convention must follow this pattern: | 108 # The scaffold naming convention must follow this pattern: |
109 # <integer1>Gv<integer2>.<integer3> | 109 # <integer1>Gv<integer2>.<integer3> |
110 # where integer 1 is the number of genomes in the scaffold_id. For example: | 110 # where integer 1 is the number of genomes in the scaffold_id. For example: |
111 # 22Gv1.1 -> 22 genomes | 111 # 22Gv1.1 -> 22 genomes |
112 # 12Gv1.0 -> 12 genomes | 112 # 12Gv1.0 -> 12 genomes |
113 # 26Gv2.0 -> 26 genomes, etc. | 113 # 26Gv2.0 -> 26 genomes, etc. |
135 i = 0 | 135 i = 0 |
136 for i2, line in enumerate(fh): | 136 for i2, line in enumerate(fh): |
137 if i2 == 0: | 137 if i2 == 0: |
138 # Skip first line. | 138 # Skip first line. |
139 continue | 139 continue |
140 line = line.rstrip('\n') | |
140 num_genes = 0 | 141 num_genes = 0 |
141 num_species = 0 | 142 num_species = 0 |
142 items = line.split("\t") | 143 items = line.split("\t") |
143 orthogroup_id = int(items[0]) | 144 orthogroup_id = int(items[0]) |
144 # Zero based items 1 to num_genomes consists of the | 145 # Zero based items 1 to num_genomes consists of the |