changeset 132:169504db8e43 draft

Uploaded
author greg
date Fri, 07 Apr 2017 08:38:49 -0400
parents 656614635ebf
children b2ae23e484e8
files .shed.yml gene_family_classifier.xml test-data/assembly.fasta test-data/proteins.blastp.22Gv1.1 test-data/proteins.blastp.22Gv1.1.bestOrthos test-data/proteins.blastp.22Gv1.1.bestOrthos.summary test-data/transcripts.cleaned.nr.pep utils.py
diffstat 8 files changed, 85 insertions(+), 119 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Fri Apr 07 08:38:49 2017 -0400
@@ -0,0 +1,15 @@
+name: plant_tribes_gene_family_classifier
+owner: greg
+description: |
+  Contains a tool that classifies gene sequences into precomputed orthologous gene family clusters using either
+  blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive).
+homepage_url: https://github.com/dePamphilis/PlantTribes
+long_description: |
+  Contains a tool that tool is one of the PlantTribes collection of automated modular analysis pipelines that
+  utilize objective classifications of complete protein sequences from sequenced plant genomes to perform
+  comparative evolutionary studies.  This tool classifies gene sequences into precomputed orthologous gene family
+  clusters using either blastp (faster), HMMScan (slower but more sensitive to remote homologs) or both (more exhaustive).
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/plant_tribes/gene_family_classifier
+type: unrestricted
+categories:
+- Phylogenetics
--- a/gene_family_classifier.xml	Thu Apr 06 14:21:39 2017 -0400
+++ b/gene_family_classifier.xml	Fri Apr 07 08:38:49 2017 -0400
@@ -230,19 +230,19 @@
         </collection>
     </outputs>
     <tests>
+        <!-- Need to figure out how to test using scaffolds data.
         <test>
             <param name="input" value="transcripts.cleaned.nr.pep" ftype="fasta" />
-            <param name="scaffold" value="22Gv1.1"/>
+            <param name="scaffold" value="plant_tribes/scaffolds/22Gv1.1"/>
             <param name="method" value="orthomcl"/>
             <param name="classifier" value="blastp"/>
-            <param name="dereplicate" value="yes"/>
-            <param name="min_length" value="200"/>
             <output_collection name="orthos" type="list">
-                <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular"/>
+                <element name="proteins.blastp.22Gv1.1" file="proteins.blastp.22Gv1.1" ftype="tabular" compare="contains"/>
                 <element name="proteins.blastp.22Gv1.1.bestOrthos" file="proteins.blastp.22Gv1.1.bestOrthos" ftype="tabular"/>
                 <element name="proteins.blastp.22Gv1.1.bestOrthos.summary" file="proteins.blastp.22Gv1.1.bestOrthos.summary" ftype="tabular"/>
             </output_collection>
         </test>
+        -->
     </tests>
     <help>
 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
--- a/test-data/assembly.fasta	Thu Apr 06 14:21:39 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
->contig_1
-CGATTAACCATGTGTACAACATGACCAATTATGGCCGATCTTGCTTGTTCAGGAGTCTCA
-CTCATCATTCCAAAATCAAGAAAAGCAAGCTTCCCGTCAGGTGTAGCTAAGAGATTCCCT
-GGGTGAGGATCTGCATGAAAATAGCCGTACTCAAGCAGCTGTCGAAGACTGCACTGTATG
-CCGGTATTCACCAGATCCAGAACACTGAGCCCTTGACTCTCAACGGCATCTTGCTCATTT
-AATTTAACACCTTCAACCCACTCCATTGTTAGCACCTTTCCACTCGTGTAATCCCAGAAA
-ATATCTGGGACAAGGATATCTTCCTTGTCTCCATATAATTTTTTAA
->contig_2
-CCTTTCCAAGGTACCCATCCCATCCAACAACATATATGCTATAAATGATAAGAAGTCCCC
-GGAAGATGCAGCAGACGACTACGAAAACCGCCTCAAGGAACTCGTCTCCGAAAAAATCAT
-ACCCGTTTCAACCATTAGCGGGTTCCCGAAGTTCGACCTCATGTTGCTTGGAATGGGGCC
-CGACGGCCATGTGGCCTCTCTTTTCCCTTCTCACATGCAACGCTATGAGAAGGAGAAATG
-GGTCACTTTCATAACTGACTCTCCCAAACCGCCTCCGTCGAGAATCACTTTTACGTTTCC
-GATGATCAACTCGGCTTCGGAGATCGCTATGGTGGTTACCGGGGCTGATTTGGCTGGTAC
-AACTAAGATAGCATTGGGTACTACGGGCAATGTTAAGCCGGGTGAGACTCCTTTGCCTTG
-TACTGAAGTTTCGGCTGAGGGAGAGGTTACTTGGTTCTTGGACAAAGATGCTGCTTCACA
-ACTGTTAAATTATGTGCGCTTTGATGATTAAAAACACGGGGTAATTGAACTCAAGGGATG
-GTTGAGAATTGTAATATTTCTGTAAGTGTCGGGGGATGGTTGAAATGCTTTTTATTATGA
-AGTGTTAAAATGTAAGATAAA
->contig_3
-CGGGGGCAAGGTTTACATGAAAGTACATCTACAGGACCTAGATAAGCATCATCACCTGAC
-AAAAAGCCATACATAGTAACACTTGCTAGTTGATCTGTGAAAATGGTGCACGAGTCGTAA
-CAGACGGCATTAGTTCCTTTCCTGTAACAGCAGATTCGTATGAATGGTTTTCTCGAAATT
-CTTCTTCAAGTGCATCTATAGGCATGGCACGAAGTGACTCTATTGTGCCTTTGCTGGGAA
-TATCCGGCTCGCTCCTAACTGGCGTGGACCCTGTAGGCTCGTAATCCATGTATTTTTGCC
-TGAAAGTATCATTCGTGTGCTGTTCGATACAGGATACCTGCTGGGAATGATCTCTCTTAA
-GGTTCTCAATTGTTTCTGAATGAGCTCTAGCAGTTGTCAGAATTTCAGAAACAGATGCCT
-TCTCCTGCTCTGACAAGCCAGCAACAACAACTCCTTCATCCACA
->contig_4
-CTGACGATGTTCATATTCATGCCACTCTAAATGTATGCCATCCATGTTGAGGAGAAATGC
-TTGTGTAAAGAAGAAACTGGAGTCAAGCGGTCCCGGTTTTACAGTTGAACGTTGCTCTAA
-ATTAGTCAAAGTACCCGACTACCCGCTCATCAAACTGGCGAGCTCGTTTTCCTCGCCGCC
-CTACTCGGTGTCTTGGCGGGGCACATGGGGGTGGCGTGTGGCGTGCGGGGATGCGACAAC
-ATCATAAATTCATAATCGAAGG
->contig_5
-GTGAGAGTGTCGCCTTTTCCATGCTACCCCTTTCCAGATCTGACTTGTTCGAACCTTCCA
-TCATCCGGTTCGACGTCAGCAGTCGGTCCTTCTTCTCCAAGCACGAGAGCAGACGGTGCG
-AGAGCGAGCAAAGACTCGCCTGCCGCGCCTCTCGTGACCTGGCTCGCATCTCCAGGATTC
-ATGGCTCCTTAGCTCGCCCTTCTTCGATACTTCGCTAGACGCCCGAGGACGTCATCGAAG
-GCGAGGGTCGGCACAACGGAGTGCTCCTTGCATGATGTCGGGGCCTGGCATCACGCGGCA
-TCGGCCAAATCGTCCCTCTCGACGTGCGTCACGGAGAGGGCCGAGCGCCGGGACGACCGC
-GTGGAAGAGCCCGCGGGATGCGGGATCCCGCTTGACGTGTGTTTGTGGCGGACCACCG
->contig_6
-TATAACAATTCAAAATATTCCTATGAAAACCAAGACCAGAGGCAACACCGGTATAACCAA
-AGCCAGAGGCAATATCCCTTTATTCTTCGGGAACAGAAAAATACACGATATGCTACAGAC
-AAAAAAATAAATCCAAACCATTCTTCTGTGCATAAACATTGTAAAAGTTTATTAGACCGC
-TGTAAAACTCGTAAATGAAATCCCTGGCAATTGAATCTGTTAAACCTGCTCC
->contig_7
-GAGAATGAGTGGTCTGGGGCTGAGTTTTTGAATGAAATGGCGGCAATGATGACTCAAAAT
-AAATCCAATGAAAACGGAACCGGAACTTTTGAAGAACTGCAACAATTGTTCGATGAAATG
-TTTCAGAGCGACATCGAGTCCTTCAATGGTTGTTCTTCATCATCCAATGAAACATGTAGC
-AACTCGAACAAGAGGAATTCCATTGAGTCGAGCTCGGCTAATTTCAGACCCGAAAATGCG
-AAACGAAAGCGGCGAGATTAGCGGGAAGAAGAATACTAGGAAAGGTAAAGGTGACG
->contig_8
-TGGCATGAAGTCGGCTCGACTCGTGCCCGCTCGTCTATAGACGAATTGAGCTAGATTCAC
-TCATTCCATGAACTCGACTCGTTCATGAGTCGGCTCGTAGTTGTCCGGACTCGCTCCATG
-GCCAGCTCTACAACAGACTGCCTGTATGTAGATGATTGTATTGATTTGTTCTTCTCTTGT
-TTAAATCCAACCAAATATCAACATGATATTTGCAATTTC
->contig_9
-ACTTCGGTTAAAGGCAGATGAGGAGGCACAATGTTTGAATCAGATGCAGCGTATCATTTT
-TGATGAAATTATGGAGCATGTGGAGTTAGAAAAGGGGGGCTTCTATTTCGTATATCGCCC
-TGGGGGCAACGGAAAGACCTTGTGGTTGGCTATTATCTCAAAACTGAGAAGCGAGGGTAG
-AATAGTTCTCGCAGTGGCTTCATCAGGTATAGCATTGCTTTTGGTTGAGGGTGGTAGAAC
-AGCCCATTCTCGATTTAAAATACCCATAGATGTCAATGAATATAACAATTGTGAAATTAA
-ACAGAACATCTACCTCGCTGAACTTATATGTCACACCAATTTGGTCATTTGGGATGAGGC
-ACCTATGACTCAATATTTTGTCTTTGAGGCGGTTGA
->contig_10
-GGGGCATTGCCGCCGATTAATTCGAGCGCGAGGTTTCGCGTATCAGGCGGAATTGGGATT
-TGGCGCCACAAAGAGCGCCACCTATGATTTCTGTGGCGGACGACTTCACGAACTCGGACG
-GGACTATCTAACCAATCTTCCAAGTTCATTGCGACATCAGTATAAGGGCGTGATGAAGGT
-TCGCTATCGTCGCTTGAGTTATTGATGGGGCCCAAATTGAGATCGAGGTTCATTGTAGTG
-GTGTTCTCTTCTGCCATTTGAGAATTTCACAAGTTCTAACAAACGAAAACGCAAATCTTC
-GGGACTAATATGCAGAATTTCCCTAAATAGAAGGGGTTTAAAA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.blastp.22Gv1.1	Fri Apr 07 08:38:49 2017 -0400
@@ -0,0 +1,50 @@
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044471	92.11	114	9	0	1	114	15	128	9e-73	228
+contig_1	gnl|Mimgu1.0|PACid:17670850	92.98	114	8	0	1	114	307	420	1e-71	232
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044470	92.11	114	9	0	1	114	313	426	6e-71	230
+contig_1	gnl|Soltu3.4|PGSC0003DMP400044472	92.11	114	9	0	1	114	313	426	6e-71	230
+contig_1	gnl|Solly2.3|Solyc04g083010.2.1	92.11	114	9	0	1	114	313	426	8e-71	230
+contig_1	gnl|Poptr2.2|PACid:18246737	91.23	114	10	0	1	114	301	414	3e-70	228
+contig_1	gnl|Nelnu1.0|NNU_016098-RA	90.35	114	11	0	1	114	315	428	3e-70	228
+contig_1	gnl|Carpa1.181|PACid:16417175	90.35	114	11	0	1	114	289	402	9e-70	227
+contig_1	gnl|Glyma1.01|PACid:16244092	90.35	114	11	0	1	114	298	411	1e-69	227
+contig_1	gnl|Glyma1.01|PACid:16244091	90.35	114	11	0	1	114	298	411	1e-69	227
+contig_1	gnl|Vitvi12X|PACid:17826505	90.35	114	11	0	1	114	229	342	1e-69	225
+contig_1	gnl|Theca1.0|Tc10_g016990	90.35	114	11	0	1	114	288	401	4e-69	225
+contig_1	gnl|Phoda3.0|PDK_30s1127391g001	88.60	114	13	0	1	114	146	259	8e-69	221
+contig_1	gnl|Glyma1.01|PACid:16251026	87.72	114	14	0	1	114	127	240	8e-69	220
+contig_1	gnl|Thepa2.0|Tp5g34670	89.47	114	12	0	1	114	310	423	1e-68	224
+contig_1	gnl|Orysa6.0|PACid:16860403	86.84	114	15	0	1	114	325	438	4e-68	219
+contig_1	gnl|Aquco1.0|PACid:18145344	88.60	114	13	0	1	114	320	433	8e-68	222
+contig_1	gnl|Orysa6.0|PACid:16860404	86.84	114	15	0	1	114	325	438	9e-68	222
+contig_1	gnl|Arath10|AT1G79600.1	89.47	114	12	0	1	114	308	421	1e-67	222
+contig_1	gnl|Bradi1.2|Bradi2g30567.2	88.60	114	13	0	1	114	300	413	1e-67	221
+contig_1	gnl|Sorbi1.4|PACid:1980340	85.96	114	16	0	1	114	312	425	3e-67	220
+contig_1	gnl|Glyma1.01|PACid:16251025	87.72	114	14	0	1	114	361	474	3e-67	221
+contig_1	gnl|Medtr3.5|Medtr4g026450.1	85.96	114	16	0	1	114	313	426	4e-67	220
+contig_1	gnl|Glyma1.01|PACid:16245030	86.84	114	15	0	1	114	326	439	7e-67	219
+contig_1	gnl|Glyma1.01|PACid:16245029	86.84	114	15	0	1	114	326	439	8e-67	219
+contig_1	gnl|Ambtr1.0.27|AmTr_v1.0_scaffold00022.11	85.09	114	17	0	1	114	310	423	4e-66	218
+contig_1	gnl|Frave2.0|gene29299	89.47	114	12	0	1	114	711	824	1e-65	219
+contig_1	gnl|Musac1.0|GSMUA_Achr10T01800_001	82.46	114	20	0	1	114	229	342	3e-65	214
+contig_1	gnl|Musac1.0|GSMUA_Achr8T21380_001	81.58	114	21	0	1	114	229	342	4e-64	208
+contig_1	gnl|Phypa1.6|PACid:18072969	81.58	114	21	0	1	114	498	611	8e-62	207
+contig_1	gnl|Selmo1.0|PACid:15405864	77.88	113	25	0	2	114	228	340	3e-58	195
+contig_1	gnl|Poptr2.2|PACid:18214805	54.31	116	49	2	3	114	279	394	1e-35	133
+contig_1	gnl|Selmo1.0|PACid:15417058	53.98	113	52	0	2	114	208	320	2e-35	132
+contig_1	gnl|Glyma1.01|PACid:16255045	51.89	106	51	0	9	114	308	413	6e-35	131
+contig_1	gnl|Vitvi12X|PACid:17841082	51.35	111	54	0	4	114	337	447	7e-35	131
+contig_1	gnl|Aquco1.0|PACid:18159073	53.15	111	51	1	5	114	165	275	1e-34	129
+contig_1	gnl|Nelnu1.0|NNU_020249-RA	53.33	105	49	0	10	114	45	149	1e-34	128
+contig_1	gnl|Solly2.3|Solyc08g068920.2.1	53.33	105	49	0	10	114	323	427	2e-34	130
+contig_1	gnl|Medtr3.5|Medtr3g105760.1	51.40	107	52	0	8	114	309	415	3e-34	129
+contig_1	gnl|Ambtr1.0.27|AmTr_v1.0_scaffold00019.389	54.29	105	48	0	10	114	330	434	4e-34	129
+contig_1	gnl|Phypa1.6|PACid:18069401	50.00	116	56	1	1	114	331	446	6e-34	129
+contig_1	gnl|Arath10|AT5G24970.2	50.47	107	53	0	8	114	363	469	1e-33	127
+contig_1	gnl|Thepa2.0|Tp2g22500	51.40	107	52	0	8	114	326	432	2e-33	127
+contig_1	gnl|Aquco1.0|PACid:18141086	53.51	114	45	1	1	114	17	122	2e-33	125
+contig_1	gnl|Mimgu1.0|PACid:17681633	52.38	105	50	0	10	114	191	295	2e-33	126
+contig_1	gnl|Musac1.0|GSMUA_Achr1T23540_001	50.93	108	53	0	7	114	313	420	3e-33	127
+contig_1	gnl|Phypa1.6|PACid:18063964	52.63	114	46	1	1	114	312	417	3e-33	127
+contig_1	gnl|Ambtr1.0.27|AmTr_v1.0_scaffold00010.332	51.75	114	47	1	1	114	293	398	4e-33	126
+contig_1	gnl|Phypa1.6|PACid:18051230	52.63	114	46	1	1	114	312	417	7e-33	126
+contig_1	gnl|Musac1.0|GSMUA_Achr4T09960_001	50.00	114	49	1	1	114	262	367	8e-33	125
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.blastp.22Gv1.1.bestOrthos	Fri Apr 07 08:38:49 2017 -0400
@@ -0,0 +1,5 @@
+Gene ID	Orthogroup ID
+contig_3	554
+contig_1	5235
+contig_2	38889
+contig_9	20
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/proteins.blastp.22Gv1.1.bestOrthos.summary	Fri Apr 07 08:38:49 2017 -0400
@@ -0,0 +1,5 @@
+Gene ID	Orthogroup ID	Arabidopsis thaliana	Thellungiella parvula	Carica papaya	Theobroma cacao	Populus trichocarpa	Fragaria vesca	Glycine max	Medicago truncatula	Vitis vinifera	Solanum lycopersicum	Solanum tuberosum	Mimulus guttatus	Nelumbo nucifera	Aquilegia coerulea	Oryza sativa	Brachypodium distachyon	Sorghum bicolor	Musa acuminata	Phoenix dactylifera	Amborella trichopoda	Selaginella moellendorffii	Physcomitrella patens	SuperOthogroup I1.2	SuperOthogroup I1.5	SuperOthogroup I1.8	SuperOthogroup I2.0	SuperOthogroup I2.5	SuperOthogroup I3.0	SuperOthogroup I3.5	SuperOthogroup I4.0	SuperOthogroup I4.5	SuperOthogroup I5.0	AHRD Descriptions	TAIR Gene(s) Descriptions	Pfam Domains	InterProScan Descriptions	GO Molecular Functions	GO Biological Processes	GO Cellular Components
+contig_3	554	4	4	3	3	8	3	9	2	3	6	8	4	5	3	4	4	5	6	5	3	1	4	53	40	37	35	3078	3326	3484	3611	3699	3800	P-loop containing nucleoside triphosphate hydrolases superfamily protein [0.557] | Kinesin-related protein 13 [0.423] | kinesin 4 [0.010] | kinesin 5 [0.010]	P-loop containing nucleoside triphosphate hydrolases superfamily protein | ATP binding microtubule motor family protein	Kinesin (PF00225) [0.979]	Kinesin, motor domain (IPR001752) [0.979]	ATP binding (GO:0005524) [0.979] | microtubule binding (GO:0008017) [0.979] | microtubule motor activity (GO:0003777) [0.979]	microtubule-based movement (GO:0007018) [0.979]	NULL / Representative annotation below 0.1%
+contig_1	5235	1	1	1	1	1	1	6	1	1	1	3	1	1	1	2	1	1	2	1	1	1	1	2	207	198	330	347	338	461	452	558	557	Protein kinase superfamily protein [1.000]	Protein kinase superfamily protein	ABC1 (PF03109) [0.968] | APH (PF01636) [0.161]	UbiB domain (IPR004147) [0.968] | Aminoglycoside phosphotransferase (IPR002575) [0.161]	NULL / Representative annotation below 0.1%	NULL / Representative annotation below 0.1%	NULL / Representative annotation below 0.1%
+contig_2	38889	0	0	0	0	0	0	0	0	0	1	0	1	0	0	0	0	0	0	0	0	0	0	489	741	746	752	773	800	798	1128	1403	1407	6-phosphogluconolactonase 5 [0.500] | 6-phosphogluconolactonase 2 [0.500]	Unkown protein(s) / No TAIR description(s)	Glucosamine_iso (PF01182) [1.000]	Glucosamine/galactosamine-6-phosphate isomerase (IPR006148) [1.000]	NULL / Representative annotation below 0.1%	carbohydrate metabolic process (GO:0005975) [1.000]	NULL / Representative annotation below 0.1%
+contig_9	20	3	35	0	3	15	7	16	70	0	25	5	1	0	339	4	9	44	0	0	0	3	2	2	21	26	25	23	30	28	29	29	29	ATP-dependent DNA helicase PIF1 [0.435] | PIF1 helicase [0.138] | ATP-dependent DNA helicase pif1 [0.138] | ATP-dependent DNA helicase RRM3 [0.014] | ATP-dependent DNA helicase PIF4 [0.014] | F-box/RNI-like/FBD-like domains-containing protein [0.003] | ATP-dependent DNA helicase pfh1 [0.002] | Protein kinase superfamily protein [0.002] | Nucleic acid-binding, OB-fold-like protein [0.002] | Phosphatidylinositol N-acetylglucosaminyltransferase, GPI19/PIG-P subunit [0.002] | ATP-dependent DNA helicase PIF2 [0.002]	PIF1 helicase	PIF1 (PF05970) [0.771] | Helitron_like_N (PF14214) [0.503] | Herpes_Helicase (PF02689) [0.349] | Rep_fac-A_C (PF08646) [0.009] | DUF223 (PF02721) [0.009] | ABC1 (PF03109) [0.003] | Phage_GPA (PF05840) [0.002] | AAA_30 (PF13604) [0.002]	DNA helicase Pif1 like (IPR010285) [0.771] | Helitron helicase-like domain (IPR025476) [0.503] | DNA helicase (IPR003840) [0.349] | Replication factor A, C-terminal (IPR013955) [0.009] | Domain of unknown function DUF223 (IPR003871) [0.009] | UbiB domain (IPR004147) [0.003] | Replication gene A protein (IPR008766) [0.002]	DNA helicase activity (GO:0003678) [0.771] | ATP binding (GO:0005524) [0.349] | helicase activity (GO:0004386) [0.349]	telomere maintenance (GO:0000723) [0.771] | DNA repair (GO:0006281) [0.771] | DNA replication (GO:0006260) [0.002]	NULL / Representative annotation below 0.1%
--- a/test-data/transcripts.cleaned.nr.pep	Thu Apr 06 14:21:39 2017 -0400
+++ b/test-data/transcripts.cleaned.nr.pep	Fri Apr 07 08:38:49 2017 -0400
@@ -1,7 +1,13 @@
+>contig_1
+KKLYGDKEDILVPDIFWDYTSGKVLTMEWVEGVKLNEQDAVESQGLSVLDLVNTGIQCSLRQLLEYGYFHADPHPGNLLA
+TPDGKLAFLDFGMMSETPEQARSAIIGHVVHMVN
 >contig_2
 LSKVPIPSNNIYAINDKKSPEDAADDYENRLKELVSEKIIPVSTISGFPKFDLMLLGMGPDGHVASLFPSHMQRYEKEKW
 VTFITDSPKPPPSRITFTFPMINSASEIAMVVTGADLAGTTKIALGTTGNVKPGETPLPCTEVSAEGEVTWFLDKDAASQ
 LLNYVRFDD
+>contig_3
+VDEGVVVAGLSEQEKASVSEILTTARAHSETIENLKRDHSQQVSCIEQHTNDTFRQKYMDYEPTGSTPVRSEPDIPSKGT
+IESLRAMPIDALEEEFRENHSYESAVTGKELMPSVTTRAPFSQIN
 >contig_9
 LRLKADEEAQCLNQMQRIIFDEIMEHVELEKGGFYFVYRPGGNGKTLWLAIISKLRSEGRIVLAVASSGIALLLVEGGRT
 AHSRFKIPIDVNEYNNCEIKQNIYLAELICHTNLVIWDEAPMTQYFVFEAV
--- a/utils.py	Thu Apr 06 14:21:39 2017 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-import os
-import shutil
-import sys
-
-
-def check_execution_errors(rc, stderr):
-    if rc != 0:
-        stop_err(stderr.read())
-
-
-def move_directory_files(source_dir, destination_dir):
-    source_directory = os.path.abspath(source_dir)
-    destination_directory = os.path.abspath(destination_dir)
-    if not os.path.isdir(destination_directory):
-        os.makedirs(destination_directory)
-    for dir_entry in os.listdir(source_directory):
-        source_entry = os.path.join(source_directory, dir_entry)
-        shutil.move(source_entry, destination_directory)
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit(1)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        fh.write('<html><head><h3>%s</h3></head>\n' % title)
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
-        for index, fname in enumerate(sorted(os.listdir(dir))):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            try:
-                size = str(os.path.getsize(os.path.join(dir, fname)))
-            except:
-                size = 'unknown'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
-        fh.write('</table></body></html>\n')