changeset 2:2f4298673519 draft

Uploaded
author davidvanzessen
date Wed, 10 Sep 2014 10:33:29 -0400
parents 856b5b718d21
children a0b27058dcac
files HS12RSS.txt HS23RSS.txt gene_identification.py logo.pm mutation_analysis.r seqlogo template.eps template.pm wrapper.sh
diffstat 9 files changed, 2842 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HS12RSS.txt	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,113 @@
+Gene	Sequence
+IGKV2D-18	CACAGTGGTACAACCCTGAACAGAAACC
+IGKV1D-39	CACAGTGTTACAAGTCATAACATAAACC
+TRBJ2-5	CACGGCCCCCGAGCCCCGCACAAAAACC
+TRBJ2-6	CACAGCCCGGGGACTCCCCGCAAAAACC
+IGKV1-37	CACAGTGTTACAAGGCATAACATAAACC
+TRBJ2-1	CACAGTGGGAAGGGGCTGCCCAGAATTC
+TRBJ2-2	CACAGCCCTGGGGACCCTGGCGCAAACC
+IGKV1-33	CACAGTGTAACAAGTCATAACATAAATC
+IGKV3-11	CACAGTGATTCCACATGAAACAAAAACC
+IGKV1D-33	CACAGTGTAACAAGTCATAACATAAATC
+IGKV1-39	CACAGTGTTACAAGTCGTAACATAAACC
+IGHD3-22	CACAGTGTCACAGAGTCCATCAAAAACT
+TRAJ31	CACAGCACTTCCTGCCTTTACTGAAACC
+TRAJ32	CACAGTCCTTCAGAGCCTTACACTAACC
+TRAJ33	CACAGACACAAAAACCTTAACAAAAACC
+TRAJ34	CACAGTGATACTGAGATCTACAAAAACC
+TRAJ36	CACAGTGTTTCTGCCCAGTACAAAAACA
+TRAJ37	CACTCTAATGCTGTACTTTACAAAAACT
+TRAJ38	CACAGTCATAGAAAGCTTTACCAAAACC
+TRAJ39	CACAGTGATCTTCGGCTGAGCAAAAACC
+TRBJ1-2	CACATAAGAATATAGCCACTCTAAAAGG
+IGHD2-2	CACAGTGACACAGCCCCATTCCCAAAGC
+IGHD1-1	CACCGTGACTCGGGGCTGTTCAGAATCT
+IGKV1-27	CACTGTGATACAAGCCCGAACATAAACC
+TRBJ1-6	CACAGCTGCAGAGGCTTAGATAAAACCC
+TRBJ1-5	CACAGTGCATCATGAGTGTGGCAAACCC
+TRBJ1-4	CACAACATTAAAGACTGGAAGGAAAACC
+TRBJ1-3	CACAGCCTCCCAGGGCCACTTCAAAACC
+IGKV1-6	CACAGTGTTACACACCCGAACAGAAACC
+TRBJ2-4	CACAGCCCCGAGACGCGGCACAGAAACT
+IGKV1-5	CACAGTGTTACACACCCGAACATAAACC
+TRAJ29	CACAGTGATTTCCTCCATAACAAAAACC
+TRAJ28	CACAGAGTTTCCTTTCTTTGCAAAAACC
+TRAJ27	CACAGTCTTTAGTGCTATTGCAATAACC
+TRAJ26	CACAGTGTTTCTGGGCTCAGCAAAAACC
+TRAJ24	CACTGTGACAAACACCTCTACAAAATGG
+TRAJ23	CACTGTGTTACATATCCTGTCAAAAACA
+TRAJ22	CACTATGATTTGCTCAACAACAAAACCA
+TRAJ21	CACCATGTTTATTGGCATTACAAAAAAT
+IGKV1-8	CACAGTGTTACACACCCAAACAAAAACC
+TRBJ2-7	CACGGAGGTGCACCCCCGCATGCAAACC
+IGKV1-9	CACAGTGTTACAAACCTGAACATAAACC
+IGKV3D-11	CACAGTGATTCCACATGAAACAAAAACC
+TRBJ2-3	CACAGCCTGGAGGCCCAGGACAAAAACC
+IGKV1-17	CACAGTGTTACACACCCGAACATAAAAC
+IGKV1D-17	CACAGTGTTACACACCCGAACATAAACC
+IGKV1D-16	CACAGTGTTACACACCCAAACATAAACC
+TRAJ58	CACTGTGCTGAGGGGCTTTGCAAAAACC
+TRAJ59	CACAGGAGCTGCTGCCTTTACATAAACT
+TRAJ52	CACTGCACTGGAGGCCTTTACAAGAACC
+TRAJ53	CACAGCCACAGAAGGCTTTACAGAAACA
+TRAJ50	CACAGCCATCCAAACCTTTACAATAACT
+TRAJ56	CACAATGACACGAGGATCTACAAAAACT
+TRAJ57	CACCCCCACAGACTGCTTTACAAATACT
+TRAJ54	CACCACAGCACGAGGCTTTACAGAAACT
+IGKV3-15	CACAGTGATTCAACATGAAACAAAAACC
+TRDD2	CACAATGAAACACATCAGTATAAAAACC
+IGKV2-40	CACAGTGGTACAGCCCTGAACAGAAACC
+IGKV6D-21	CACTGTGTTACAACCCAGAACAAAAACT
+IGKV3-20	CACAGTGATTCCACAGGAAACCAAACCT
+TRDJ3	CACATTATGACAGTGCCTCACAGGTAAC
+IGHD3-3	CACAGTGACACAGACCTCACCCCAAACC
+TRAJ49	CACTGTGATAGGAAGCTCAACAAAAACC
+TRAJ48	CACAGTGTTCTAAGTCATTGCAAAAACC
+TRAJ41	CACAGTGCGTTCTCCCTAAACAAAAAAC
+TRAJ40	CACAGTGTTATGTGTCTCTACATAAACC
+TRAJ43	CACAGTAATACATGCTCTAACAAAAACC
+TRAJ42	CACAGTCCTATGGGGCTTTACAATAATC
+TRAJ45	CACTCTGGGCCAACCCTTTACATAAACT
+TRAJ44	CACTGTGAGATGCTTCATAACAGAAACC
+TRAJ47	CACAGCGTCAAACTCCTCTACAAAAACA
+IGLJ2	CACTGTGACACAGGCTCATACAAAAACC
+IGLJ1	CACAGTGACTGAGGCTCAGACCAAAACC
+IGLJ7	CACAGTGACACAGCCCCACACACAAACC
+IGLJ6	CACTGTGATATAACCCTGCACACAAACC
+LCK	CACACACACACACACACAAGCCAAAACC
+IGKV4-1	CACAGTGCTTCAGCCTCGAACACAAACC
+TRAJ4	CACAATCAGATGGTGCTTTACAAGAACT
+HPRT_12	CACACACACACACACACACACAAATACA
+IGKV3D-20	CACAGTGATTCAGCTTGAAACAAAAACC
+TRAJ5	CACCCTGGTACATCACAGTACAAAATCC
+TRAJ6	CACAGTGAGGACAGCCTTTGATAAAACC
+TRAJ3	CACTGTGGGTAAGGTCTTTGAGATAACC
+IGKV2D-29	CACAGAGGTACAGACCAATACAGAAACC
+IGKV2D-28	CACAGTGGTACAACCCCTAACAGAAACC
+TAL2	CACTGTGACTTACTGGAGATATAAAAAT
+IGKV2D-26	CACAGTGCTACAGCCCTGAGCACAAACC
+TRAJ61	CACAGGAGTGGGCACCTTTACAAAAACC
+TRBD1	CACAATGTTACAGCTTTGTACAAAAACA
+IGKV1D-8	CACAGTGTTACACACCCGAACAAAAACC
+LMO2	CACAGTATTGTCTTACCCAGCAATAATT
+IGKV2D-30	CACAGTGGTACAGCCCTGAACAAAAACC
+IGKV1/OR2-108	CACAGTGTTACAAGCCATAACAAAAACC
+TRBD2 	CACAATGTTACACCATGATACAAAAATG
+IGKV2-24	CACAGTGGTACAGCTCTGAACAAAAACC
+IGKV2-28	CACAGTGGTACAACCCCTAACAGAAACC
+IGKV2-29	CACAGTGGTACAGACCAATACAGAAACC
+TRAJ16	CACAGTGATCTATTGTACCACAAAAACC
+TRAJ17	CACAATGATTTAAGGCCCAGCAAAAACC
+TRAJ15	CACAGTGAAATGAGGCCCTGCAAATACC
+TRAJ12	CACAGTGTTTCTTAGTCAGTCAAAAACA
+TRAJ13	CACTGTAATGCCTGCCTTTACAAAATGA
+TRAJ10	CACAGTGTTTGATGCCTCACAATAAACT
+TRAJ11	CACTATAGCAAATCCCCATACAAAAATG
+TRAJ18	CACAATGCTGGTCCCCTTTACATGAACC
+IGKV1D-43	CACAGTGTTACACACCCAAACAAAAACC
+IGKV2-30	CACAGTGGTACAGCCCTGAACAAAAACC
+TRAJ30	CACTGTGATTGGGACCATAACAAAAACT
+IGHD3-16	CACAGTGACACAGACCTCACTTCAAACC
+IGHD4-4	CACAGTGTCACAGAGTCCATCAAAAACC
+IGHD3-10	CACAGTGACACAGACCTCATTCTAAACC
+IGHD7-27	CACAGTGGTTCTCAGCTCAGCCAAAACC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/HS23RSS.txt	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,174 @@
+Gene	Sequence
+IGLV2-8	CACAGTGTTTTAAGTCAATGAGGAAGTAAGACCAAAACC
+IGLV4-3	CACAGTGACACAGATGAAGGGGAAGTGAGGCCAAAACTC
+TRBV21-1	CACAGTGCCGAATGTTAGCCCTTCTTAGAACACAAACTC
+IGHV1-17	CACAGTGCGAAAACCCACATCCTGAGAGTGTCAGAAACC
+IGHV1-14	CACAGTGTGAAAACCCACATCCTGAGAGAGTCAGAAATC
+IGHV3-15	CACAGTGAGGGGAGGTCAGTGTGAGCCCGGACACAAACC
+IGHV1-18	CACAGTGTGAAAACCCACATCCTGAGGGTTTCAGAAACC
+IGHV3-11	CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC
+IGLV2-5	CACAGTGGTCCAAGTTCTTGGGGAACTGAGACGAAAACC
+IGHV3-13	CACAGTGAGGGGAAGTCAGTATGAGCCCAGACACAAACC
+TRAV2	CACAGAGGCAGGGAACCCATGAAGAGCTGAACAGAAACA
+TRAV3	CACACTGATAGGGGCTGCAGGGGGAGCAGAACACAAACT
+TRAV4	CACAGTGAGACAGATGGGCCTGCACCTGTGCCGTTTTCC
+TRAV5	CACATTGCTTCTCAGGCACCTGTATCCTGTACCCAAACC
+TRBV6-4	CACAGTGCTGCACAGCCATCTCCTCTCTGTACATAAATG
+IGHV3-19	CACTGTGAGAGGACGGAAGTGTGAGCCCAGACACAAACC
+TRBV6-6	CACAGCGCTACAAGACCATCTCCTCTCTGCACATAAAGG
+TRBV18	CACATTGATGCAGAGCCACATCCTCTCAGTCCACAAACA
+TRBV6-1	CACAGCGCTGCATGGCCGTCTCCTCTCTGCACATAAAGG
+TRBV6-2	CACAGTGCTGCACGGCTGTCTCCTCTCTGCACAGAAAGG
+TRBV6-3	CACAGTGCTGCACGGCTGTCTCCTCTCTGCACAGAAAGG
+TRBV12-4	CACAGCGCTGCAGAATCACCCCTTTCCTGTGCAGAAACC
+TRAV27	CACAGTGCTCTTGAGGCACCTGCTGCCTGCACCCAAACC
+TRBV5-4	CACAGCCCGGCAGAGTCTCTGACATTCTATACATAAACT
+TRBV5-3	CACAGCCCTGCAGAGTCACTGGAACTCTGTGCACTAATC
+TRGV3	CACAGTGATTCAGACCTGTCCTACACCACACTGAAAATC
+TRBV5-1	CACAGCCCTACAAAGCCAACCACATTCTGTGCACAAACC
+IGHV3-9	CACAGTGAGGGGAAGTCAGCGAGAGCCCAGACAAAAACC
+TRGV8	CACAGTGATTCAGACCTGTGCTACACCACACTGAAAATC
+TRBV5-8	CACAGCCCAGCAGAATCACTGACATTCTGTATATAAACT
+TRDV2	CACCCTGCTGCAGCTCTACTTCTGAGCAGCTCAAAAACC
+IGKJ1	CACAGTGAGAGCTCTCCATTGTCTTGCTGAACAGAAACC
+IGKJ2	CACAATGGTTCCTCTTAACTTCCCTCCTATACAAAAACT
+IGKJ4	CACAGTGAGGGATCTCACCCTTTCCCCTCAACAAAAACC
+TRAV26	CACACTGGGACAGATGGGGCTGCACCTGTGCAATATCTC
+TRAV25	CACAGTGCTCCCCAAGCACCTGCTGCCTGTCTCCAAATC
+TRAV24	CACAGTGCTGTTCAGGCACCTGCAGCCCATACGCAAACC
+IGHV1-24	CACAGTGTGAAAACCCACATCCTGAGAGCGTCAGAAACC
+TRAV41	CACAGTGCTCCCCAGGCACCTGGAGCCCGTACCTAAACT
+IGLV5-52	CACAGTGCTCCAGACCCATGAGGAAGTAAGACAAAACCC
+IGLV10-54	CACAGTGCCTCAGGCCAGTGGGGAAGTGAGATAAAAACT
+IGHV3-64	CACAGTGAGGAGAAGTTAATGTGGGACCATGCAGAAACC
+IGHV3-66	CACAGTGAGGGGAGGCCATTGTGCGCCCAGACACAAACC
+TRBV7-9	CACAGCATGGCACAGTCGCCTCCTTCCTGCTCACAAACC
+TRBV7-8	CACAGCATGGCACAGTTGCCTCCTTCCTGTTCACAAACC
+IGLV1-40	CACAGTGCTCAGGCGTGTGTGTGAACTGAGACAAGAACC
+IGLV3-21	CACGGTGACACAGGCAGATGAGGAAGTGAGACAAAAACA
+IGHV7-4-1	CACAGTGTGGAAACCCACATCCTGAGAGTGTCAAAAACC
+IGLV3-27	CACAGTGACACAGGCAGATGGAGAAATGAGACACAAACC
+TRBV4-1	CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC
+IGLV1-41	CACAGTGCTCCAGGCCAATGGGGAACTGAGACAAGAACC
+TRAV13-1	CACATTGCTTCCCAGGCACCTGCTACCCGTACACAAACC
+TRAV13-2	CACATTGCTTTCCAGGCATCTGTAACCATCACCCAAACC
+TRBV5-6	CACAGCCCATCAGAGTCACTGACGTTCTGTATATAAACT
+IGLV3-25	CACAGTGACACAAGCAGACAGGGAAGTGAGACATAAACC
+TRBV10-2	CACAGTGCTGCACAGCTGCCTCCTCTCTGCACGGAAATG
+TRBV10-3	CACAGTGCTGCATGGCTGCCTCCTCTCTGCACGTAAACA
+TRBV10-1	CACAGTGCTGCACAGCTGCCTCCTCTCTGCACATAAAGG
+IGLV6-57	CACAGTGCTCCAGACCCATGGGGAAGTGAGACAGAAACT
+TRBV6-5	CACAGCGCTACAAGGCCGTCTCCTCTCTGCACATAAAGG
+IGHV3-73	CACAGTGAGGGGAGGTCAGTGTGAGCCCGGACACAAACT
+IGHV3-72	CACAGCGAGGGGAGGTCAGTGTGAGCCCGGACACAAACC
+IGLV1-47	CACAGTGCTCCAGGCCCAGGGGGAACTGAGACAAGAACC
+TRAV9-2	CACAGTGACAGGGACTGCAGGGGGAGCTGAGCACAAACT
+IGLV7-46	CACAGTGACAGACCCATGAGAGGAACCAAGACATAAACC
+TRBV6-7	CACAGCGCTGCAAGGCTGTCTCCTCTCTGCACATAAAGG
+TRBV3-1	CACAGCCCTGCAAAGTCACTGCATCCCTGTGCACAAACC
+IGLV7-43	CACAGTGACAGACTCATAAGAGGAACCAAGACATAAACC
+IGHV1-8	CACAGTGTGAAAAACCACATCCTCAGAGAGTCAGAAACC
+IGLV1-51	CACAGTGCTCCAGCCCAATGGGGAACTGAGACAAGAACC
+IGLV1-50	CACAGTGCTCCAGGCCCAGGGGGAGGTGAGAGAAGAACC
+IGHV1-3	CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACC
+IGHV1-2	CACAGTGTGAAAACCCACATCCTGAGGGTGTCAGAAACC
+IGHV4-39	CACAGTGTGGAAACCCACATCCCGAGGGAGTCAGAAACC
+TRBV2	CACAGCCTTGCAAAGACAACTCCAGCCTGTGCAAAATCC
+TRDD3	CACAGTGCTACAAAACCTACAGAGACCTGTACAAAAACT
+TRDD2	CACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACA
+TRAV29/DV5	CACAGTGCTCTCCAGGCACCTGCAGCCCGTACTCAAACC
+IGLV3-10	CACAGTGACACTGGCAGATGGGGAAGTGAGACACAAACC
+TRBV27	CACAGTGTTGCACAGCCAGCTGCTCTCTGCACAAAAACA
+TRBV5-5	CACAGCCCGGCAGAATCACTGACATTCTGTATATAAACT
+TRGV10	CACATACTAGAACTGTTGAAACAACATGCACAAAATCCC
+TRGV11	CACAGTGTTCGAGTTGTCAAGATAAGGTACACAGAAACT
+TRGV5	CACAGTGATTCAGACCTGTCCTACACCACACTGAAAATC
+TRBV30	CACACTGAGCTGGGTGGGGCAGACATCTGTGCAAAAACC
+IGHV3-48	CACAGTGAGGGGAGGTCAGTGTGACACCAGACACAAACC
+IGHV3-49	CACAGTGAGGGGAGGTCAATGTGAGCCCAGACACAAACC
+TRGV2	CACAGTGATTCAGATCCGCCCTACACCACACTGAAAATC
+IGLV	CACAGTGCTGCAGGCACATGGGGAACCGAGACAAAAACC
+IGHV3-43	CACAGTGAGGGGAAGTCAGCGAGAGCCCAGACAAAAACC
+IGHV3-41	CACAGTGAGAGGAAGTCCGTGTGAGCCCAGACACAAACC
+IGHV3-7	CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC
+HPRT_23	CACTGTAGGTTCATCTTTATGGAGATGCTCATGGCCTCA
+TRGV9	CACAGCAGCAGACAGTTTGAGCCATCCCATTCAATAAAT
+IGLV4-60	CACAGTGATACAGGCAGATGAGGAAGTGGGACAAAATCC
+TRBV28	CACAGCGCAGCACAGCCGCATCCTCTCTGCACAAAAAGA
+TRBV25-1	CACAGTGCTACATGGATACCGACACTCCGCACAGAAAGG
+IGLV4-69	CACAGTGACACAGGCAGATGAGGAAGTGGGACAGAAACC
+IGHV4-28	CACAGTGAGGGGAGGTGAGTGTGAGCCCAGACACAAACC
+TRBV3-2	CACAGCCTTACAGAGCCACTGCATCCCTGTGCACAAACC
+IGHV6-1	CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC
+TRAV19	CACAGTGAGATGGGTGCCTGTGGGAGCCCTACAAAAACC
+TRAV10	CACTGTGCTCCACAGGCACTTGAAGCCAGTATGCAAACC
+TRAV17	CACAGTGTTCCCCAGGAACCTGCAGCCTCTACGCAAACC
+TRBV4-2	CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC
+TRBV4-3	CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC
+IGHV3-53	CACAGTGAGGGGAGGTCAGTGTGAGCCCAGACACAAACC
+IGHV3-52	CACAGTGAGAGGCACAGTGAGGGGAGGTCAGTGTGAGCC
+TRBV7-3	CACAGCATGACACAATCGCCTCCTTCCTGCTCATAAACC
+TRBV7-2	CACAGCATGGCACAGTCGCCTCCTTCCTGCTCATAAACC
+TRBV7-7	CACAGCATGGCACAGTCGCCTCCTTCCTGTTCACAAACC
+TRBV7-6	CACAGTGTGGCATAGTCGCCTCCTTCCTGTTCACAAACC
+IGHV2-5	CACAAAGACACAGCCCAGGGCACCTCCTGTACAAAAACC
+IGLV3-13	CACAGTGACACAGGCAGATGGGGAAGTGAGACACAAATC
+IGLV3-16	CACAGTGACACAAGGAGACAGGGAAGTGAAACATAAACC
+IGLV3-19	CACAGTGACACAGACAGATGGGGAAGTGAGACAGAAACC
+IGHJ3	CACAGGGACACAGTCCGTTCCTAGACCCAGACACAAACC
+IGHV1-45	CACAGTGTGAAAACCCACATCCTGAGACCGTCAGAAACC
+IGHV1-46	CACAGTGTGAGAAACCACATCCTCAGAGTGTCAGAAACC
+IGLV8-61	CACAGTGATTTAAACCTATGAGGAAGTGCAACTAAAACC
+IGHV4-59	CACAGTGAGGGGAGGTGAGTGTGAGCCCAGACAAAAACC
+TRAV1-1	CACAGTGACTATGAGGCCTCCTTAACTGTGCCAAAATTC
+TRAV1-2	CACGGTGACTATGAGGCCTCTTTAGCTGCACCAAAATTC
+TRGV7	CACAGTGATTCACACCTGCCCTACACCACACTGAAAATC
+IGHV3-20	CACAGTGAGGGGAAGCCAGTGAGAGCCCAGACACAAACG
+IGHV3-21	CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC
+IGHV3-23	CACAGTGAGGGAAGTCATTGTGAGCCCAGACACAAACCT
+IGLV2-23	CACAGTGGTCCAAGTTCATGGGGAACTGAGACCAAAACC
+TRBD1	CACAATGATTCAACTCTACGGGAAACCTTTACAAAAACC
+TRGV4	CACAGTGATTCAGATCCGCCCTACACCACACTGAAAACC
+TRAV12-1	CACAGTGCTCCCCAGACACCTGCAGTCTGTACCCAAACC
+TRAV12-3	CACAGTGCTCCCCAGACACTTGCAGTCTGTACCCAAACC
+TRAV12-2	CACAGTGCTCCCCAGACACCTGCAGTCTGTACCCAAACC
+IGLV3-6	CACAGTGACACAGGCAGATTAGGAAGTGAGACACAAACC
+IGLV3-1	CACAGTGACACAGGCAGATGCGGAAGTGAGACAGAAACC
+IGHV1-58	CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACG
+TRBD2 	CACGATGATTCAGGTAGAGGAGGTGCTTTTACAAAAAAC
+IGLV3-9	CACAGTGACACAGGCAGATGAAGAAGTGAGACACAAACC
+TRBV6-8	CACAGCGCTACAAGGTTGTCTCCTCTCTGCACATAAAGG
+TRBV9	CACAGCCCTGCATGAGCATCAGCCTTCTGTGCAATAACA
+IGHV3-35	CACTGTGAGAGGTCGGAAGTGTGAGCCCAGACACAAACC
+IGHV3-33	CACAGTGAGGGGAGGTCATTGTGCGCCCAGACACAAACC
+IGLV2-14	CACAGTGGTCCAAGTTCCTGGGGAACTGAGACCAAAACC
+IGHV3-30	CACAGTGAGGGGAAGTCATTGTGCGCCCAGACACAAACC
+TRAV8-1	CACAGTGTCTGGGACTGCAAAGGGAGCTGAACACAAACT
+IGLV2-18	CACAGAGGTCCAAGTTCCTGGGGAACTGAGACCAAAACC
+TRAV14/DV4	CACAGTGACAGAACTGTCGGAGGGAGGTGTACAAAAGCC
+IGHV2-26	CACAGAGACACAGCCCAGGATGCCTCCTGTACAAGAACC
+TRBV16	CACAGTGTTAAATATTAGCTAATCTTAGGACACAGACTC
+TRBV15	CACAGAGCTGCAGTGCTTCCTGCTCTCTGTTCATAAACC
+TRBV14	CACAGTGCTTCACAGTCGTGCCCTTGCTGTGCAAAACCA
+TRBV13	CACAGCATGGCACAGTCGCCTCCTTCCTGCTCACAAACC
+TRBV11-3	CACAGTGTAGCAGAGACACTTCCCTCCTGTGCAGAAAAC
+TRBV19	CACAGTGAAGCACGGATGTCGCCTCTCTGTGCATAAATG
+TRBV11-2	CACAGTGTAGCAGAGACACTTCCCTCCTGTGCAGAAAAC
+TRAV38-1	CACAATGAGATGAGCAGCAGGTAGAGGCTTACAGAAATC
+TRBV12-5	CACAGCGCTGCAGAATCACCTGCTCCCTGTGCAGAAACC
+TRBV12-2	CACAGCGCTGCAGAATCTCCCCCTCCCTGTGCAGAAACT
+TRBV12-3	CACAGCGCTGCAGAATCACCCCTTTCCTGTGCAGAAAAC
+IGHV1-69	CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACC
+LMO1_23	CACAGTGGCTCACCACCCCACACAGCCCTCACTCTGGCA
+IGLV5-45	CACAGTGACACACACAGATGGGGAAGTGGGACAAAAACC
+IGLV5-37	CACAGTGACACACACAGATGGGAAAGTGGGACAAAAACC
+TRAV23/DV6	CACAGTGCTCCCCAGGCACCTGAAGCCTGTACCCAAACC
+TRDV1	CACAGTGTTTGAAGTGATAGTAAAAGCAAAACAAAAACC
+TRBV20-1	CACAGCGCCAGGAGGGGATCAGACACCGCGGCAAGAACC
+IGHV2-10	CACAGAGACAGAGCCCAGGGTGCCTCTTGTACAAGACCC
+TRBV29-1	CACAGTGCGGGGCACAGATCAAAGATCTGAGCAAGAACC
+TRBV23-1	CACAGCACTGAAATGTCAGTTCCTCTTAGCACACAAACT
+TRAV34	CACAGCGATCTTCAGGCCTCTATCAGCTGTCTCCAAACC
+TRAV30	CACAGTGATACCCAGGCCTCCAAGACCTGTACTCAAACC
+TAL1_23	CACAGCCTCGCGCATTTCTGTATATTGCGTAAGGAAAAG
+TRAV39	CACAGTGCTCCCCTGACGCCACCAGTCTGTACCCAAACC
--- a/gene_identification.py	Mon Aug 18 04:42:59 2014 -0400
+++ b/gene_identification.py	Wed Sep 10 10:33:29 2014 -0400
@@ -1,6 +1,7 @@
 import re
 import argparse
-
+import time
+starttime= int(time.time() * 1000)
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file")
@@ -255,6 +256,7 @@
 
 #print ca,cg,cm,(ca+cg+cm)
 
+print "Time: %i" % (int(time.time() * 1000) - starttime)
 
 
 
@@ -262,4 +264,3 @@
 
 
 
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/logo.pm	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,826 @@
+#!/usr/local/bin/perl -w
+
+=head1 NAME
+
+    logo.pm - organizes data in FASTA and CLUSTAL formats into height data.
+
+=head1 SYNOPSIS
+
+    Perl module
+
+=head1 DESCRIPTION
+
+    logo.pm: Takes in strings of aligned sequences and sorts them vertically
+             based on height as assigned by the following equations found in
+             Schneider and Stephens paper "Sequence Logos: A New Way to Display
+             Consensus Sequences":
+    
+                 height = f(b,l) * R(l)                            (1)
+
+             where f(b,l) is the frequency of base or amino acid "b" at position
+             "l". R(l) is amount of information present at position "l" and can
+             be quantified as follows:
+
+                 R(l) for amino acids   = log(20) - (H(l) + e(n))    (2a)
+                 R(l) for nucleic acids =    2    - (H(l) + e(n))    (2b)
+
+             where log is taken base 2, H(l) is the uncertainty at position "l", 
+             and e(n) is the error correction factor for small "n". H(l) is
+             computed as follows:
+
+                 H(l) = - (Sum f(b,l) * log[ f(b,l) ])             (3)
+
+             where again, log is taken base 2. f(b,l) is the frequency of base
+             "b" at position "l". The sum is taken over all amino acids or
+             bases, depending on which the data is.
+
+             Currently, logo.pm uses an approximation for e(n), given by:
+
+                 e(n) = (s-1) / (2 * ln2 * n)                      (4)
+
+             Where s is 4 for nucleotides, 20 for amino acids ; n is the number
+             of sequences in the alignment. e(n) also  gives the height of error
+             bars.
+
+=cut
+
+package logo;
+
+use strict;
+use Carp;
+
+################################################################################
+######                             SOME VARIABLES                         ######
+################################################################################
+
+my $DEBUG = 0;
+
+my $AA = 0;
+my $NA = 1;
+
+my %BASES = ("a" => "adenine",
+	     "t" => "thymine",
+	     "g" => "guanine",
+	     "c" => "cytosine",
+	     "u" => "uracil");
+
+# does not include B or Z
+my %AMINOACIDS = ("a" => "", "c" => "", "d" => "", "e" => "", "f" => "",
+		  "g" => "", "h" => "", "i" => "", "k" => "", "l" => "",
+		  "m" => "", "n" => "", "p" => "", "q" => "", "r" => "",
+		  "s" => "", "t" => "", "v" => "", "w" => "", "y" => "");
+
+my @data;
+my $kind;
+my ($seqs_r, $desc_r);
+
+my $CONFIDENCE_LIMIT = 0.90;
+
+################################################################################
+######                             SOME FUNCTIONS                         ######
+################################################################################
+
+=head1 APPENDIX
+
+=cut
+
+=head2 getHeightData()
+
+ Usage   : my ($height_data_r, $description_data_r, $kind) =
+              logo::getHeightData($input_data_r, $params);
+ Returns : * REFERENCE TO array of height data
+           * REFERENCE TO array of input description strings
+           * $AA if the data is amino acid, $NA otherise
+ Args    : $input_data_r : input data in CLUSTAL or FASTA formats
+         : $params       : hash of parameters
+
+ getHeightData is the entry point into the logo.pm module. $input_data_r is a
+ reference to  an array of strings containing FASTA or CLUSTAL data, where all
+ lines whose first character is "#" is considered a comment line.
+
+ $params is a hash of parameters with the following keys:
+   * smallsampletoggle : 0 to turn off small sample correction, otherwise
+                         small sample correction is on
+   * input_kind : 0 for amino acids, 1 for nucleic acids; if undefined,
+                  logo.pm will attempt to automatically detect whether the
+                  input consists of amino or nucleic acid data. If
+                  $input_kind is defined, only those residues  defined by
+                  $input_kind will be in the output -- all other residues are
+                  considered as spaces. For example, if $input_kind is $NA,
+                  the residue "I" or "i" are considered spaces, since "I" and
+                  "i" are not nucleic acid residues.
+   * stretch : stretch all characters so they are flush at the maximum number
+               of bits allowed
+
+ Sample use:
+
+  # get FASTA data
+  open (FASTA, "$fastafile");
+  my @inputdata = <FASTA>;
+  close (FASTA);
+
+   my %heightparams = (
+		       smallsamplecorrection => 0,
+		       input_kind => 0,
+		       stretch => 0
+		       );
+
+  # get height data
+  my ($heightdata_r, $desc_r, $kind) = logo::getHeightData(\@inputdata, \%heightparams);
+
+=cut
+
+# entry point into module
+sub getHeightData {
+
+    # $smallsampletoggle is toggle to turn off small sample correction (1 to turn off)
+    # $input_kind can be $AA or $NA or undef
+    my ($input_data_r, $params) = @_;
+
+    # gary 040119: adjust for formats (Unix is \n, Mac is \r, Windows is \r\n)
+    $input_data_r = normalizeData($input_data_r);
+
+    # gets sequences, sets $kind temporarily
+    my ($goodlength, $maxreslength, $badline, $validformat);
+    ($seqs_r, $desc_r, $maxreslength, $goodlength, $badline, $validformat) =
+	getSeqs($input_data_r, $params->{input_kind});
+
+#    for(my $i = 0; $i < scalar @$seqs_r ; $i++) {
+#	print STDERR ($desc_r->[$i] . "\n" . $seqs_r->[$i] . "\n");
+#    }
+#    print STDERR "maxreslength = $maxreslength\n";
+#
+#    exit(0);
+
+    if ($DEBUG) { print STDERR ("point 1\n");}
+
+    # check for valid format
+    if ((defined $validformat) && ($validformat == 1)) {
+#	print("returning\n");
+	return (undef, undef, undef, undef, undef, 1);
+    }
+
+    if ($DEBUG) { print STDERR ("point 2\n");}
+
+    # check for bad length
+    if (!$goodlength) {
+	return (undef, undef, undef, $goodlength, $badline);
+    }
+
+    # reset $kind if in $input_kind
+    if (defined $params->{input_kind} && isLegalKind($params->{input_kind}) ) {
+	$kind = $params->{input_kind};
+    }
+
+    # build data
+    buildData(@$seqs_r, $params->{smallsampletoggle}, $params->{stretch}, $maxreslength);
+
+    if ($DEBUG) { print STDERR ("point 3\n");}
+
+#    print STDERR ("data size = ", scalar @data, "\n");
+#    foreach (@data) {
+#	print STDERR ("$_\n");
+#    }
+#
+#    exit(0);
+#
+#    print STDERR ("return at 2\n");
+    return (\@data, $desc_r, $kind, $goodlength, $badline);
+}
+
+sub isLegalKind {
+    return ($_[0] =~ /^[01]$/);
+}
+
+################################################################################
+#
+# sub normalizeData($data_r) returns $data_r, with Mac/Unix/Windows newline
+# style normalized to standard Unix-style newline style
+#
+################################################################################
+sub normalizeData {
+    my ($data_r) = @_;
+
+    # check args
+    if (not defined $data_r) {
+	die "data_r must be defined\n";
+    }
+
+    my @normalized = ();
+    foreach my $pseudo_line (@$data_r) {
+	my @split_line = split(/[\r\n]+/, $pseudo_line);
+	push(@normalized, @split_line);
+    }
+
+    return \@normalized;
+}
+
+
+################################################################################
+#
+# sub getSeqs($data_r, $kind) returns 5 values:
+#
+# * array reference to sequence strings
+# * array reference to sequence names
+# * length of sequence
+# * 1 if all sequences have the same length, 0 else
+# * line number L where sequenceLength(L) != sequenceLength(other lines), else
+#   undef
+#
+################################################################################
+sub getSeqs {
+    my ($input_data_r, $kind) = @_;
+
+    unless( $input_data_r->[0] ){
+        return (undef, undef, undef, undef, undef, 1);
+    }
+
+    # skip all comment chars and lines of all spaces
+    while ( ($input_data_r->[0] =~ /^\s*\#/) || ($input_data_r->[0] =~ /^\s*$/) ) {
+	shift @$input_data_r;
+	if( !defined $input_data_r->[0])  {return (undef, undef, undef, undef, undef, 1);}
+    }
+
+    if (isFormat_FASTA($input_data_r)) {
+	return getSeqs_FASTA($input_data_r, $kind);
+
+    } elsif (isFormat_CLUSTAL($input_data_r)) {
+	return getSeqs_CLUSTAL($input_data_r, $kind);
+
+    } elsif (isFormat_FLAT($input_data_r)) {
+	return getSeqs_FLAT($input_data_r, $kind);
+
+    } else {
+	if ($DEBUG) {print STDERR ("format nothing\n");}
+	return (undef, undef, undef, undef, undef, 1);
+    }
+
+#    if ($_[0] =~ />/) {
+#	return getSeqs_FASTA(@_);
+#    } else {
+#	return getSeqs_CLUSTAL(@_);
+#    }
+}
+
+################################################################################
+#
+# sub isFormat_FASTA($data_r) returns 1 if $data_r is in FASTA format
+#
+################################################################################s
+sub isFormat_FASTA {
+    my ($input_data_r) = @_;
+
+    # check args
+    if (not defined $input_data_r) {
+	Carp::confess("logo::isFormat_FASTA : input_data_r must be defined\n");
+    }
+
+    if ($input_data_r->[0] =~ />/) {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+################################################################################
+#
+# sub isFormat_CLUSTAL($data_r) returns 1 if $data_r is in CLUSTAL format
+#
+################################################################################
+sub isFormat_CLUSTAL {
+    my ($input_data_r) = @_;
+
+    # check args
+    if (not defined $input_data_r) {
+	Carp::confess("logo::isFormat_CLUSTAL : input_data_r must be " .
+		      "defined\n");
+    }
+
+    my $i=0;
+
+#    # skip spaces or just "*" and "." and ":"
+#    while ($input_data_r->[$i] =~ /^[\*\:\s]*$/) {
+#	$i++;
+#    }
+
+    # if it looks like CLUSTAL W (version) ... , then it must be clustal
+    if ($input_data_r->[$i] =~ /^\s*CLUSTAL/) {
+	return 1;
+    }
+
+    # CLUSTAL looks like: "name        seq"
+    if ($input_data_r->[$i] =~ /^\s*(\S+)\s+(\S+)\s*$/) {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+################################################################################
+#
+# sub isFormat_FLAT($data_r) returns 1 if $data_r is in FLAT format
+#
+################################################################################
+sub isFormat_FLAT {
+    my ($input_data_r) = @_;
+
+    # check args
+    if (not defined $input_data_r) {
+	Carp::confess("logo::isFormat_FLAT : input_data_r must be defined\n");
+    }
+
+#    print("checking flat\n");
+#    print("first element = -->", $input_data_r->[0], "<--\n");
+
+    if ($input_data_r->[0] =~ /^[a-zA-Z\-]+\s*$/) {
+	return 1;
+    } else {
+	return 0;
+    }
+}
+
+################################################################################
+######                          FORMATTING FUNCTIONS                      ######
+################################################################################
+
+# the flat sequence format is as follows:
+# sequence1
+# sequence2
+# sequence3
+# ...
+# sequenceN
+sub getSeqs_FLAT {
+
+    if ($DEBUG) {print STDERR "DOING FLAT\n";}
+
+    my ($input_data_r, $input_kind) = @_;
+
+    my $linelength = 0;
+    my $seqCount = 0;
+    my $total_residues = 0;
+    my (@returnVal, @desc) = ();
+    my $prevlinelength = undef;
+    my $NA_count = 0;
+
+    foreach my $seq (@$input_data_r) {
+#	chomp $seq;
+	$seq =~ s/\s+$//;
+
+	my @chars = split(//,$seq);
+
+	my $char;
+	foreach (@chars) {
+	    $total_residues++;
+	    $linelength++;
+
+	    # set $char
+	    if (defined $input_kind) {
+		if ($input_kind == $AA) {
+		    $char = (isAA($_)) ? $_ : "-";
+		} else { # == $NA
+		    $char = (isNA($_)) ? $_ : "-";
+		}
+	    } else {
+		$char = $_;
+		if (isNA($char)) {
+		    $NA_count++;
+		}
+	    }
+
+	    $returnVal[$seqCount] .= $char;
+	}
+	$desc[$seqCount] = "no name";
+
+	if ($seqCount == 0) {
+	    $prevlinelength = $linelength;
+	} elsif ($prevlinelength != $linelength) {  # different number of residues, so complain
+	    return (undef, undef, undef, 0, $seq);  # 0 for not same length, $seq is name
+	}
+	$linelength=0;
+
+	$seqCount++;
+    }
+
+    # determine whether to use $NA or $AA
+    if (!defined $input_kind) {
+	if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) {
+	    $kind = $NA;
+	} else { 
+	    $kind = $AA;
+	}
+    }
+
+    return (\@returnVal, \@desc, $prevlinelength, 1, undef);
+}
+
+sub getSeqs_CLUSTAL {
+
+    if ($DEBUG) {print STDERR "DOING CLUSTAL\n";}
+
+    my ($input_data_r, $input_kind) = @_;
+
+    my @returnVal;
+    my @desc;
+    my $seqCount=0;
+    my $reslength = 0;
+    my ($name, $seq);
+
+#    my $input_kind = pop @_;
+#    my $CONFIDENCE_LIMIT = 0.90;
+    my $NA_count = 0;
+    my $total_residues = 0;
+    my ($prevlinelength, $linelength) = (0,0);
+
+#    foreach (@_) {
+    foreach (@$input_data_r) {
+#	chomp;
+
+	if ($DEBUG) {print STDERR ("line = $_\n")};
+
+	$_ =~ s/\s+$//;
+
+	# skip if it is a comment character -- first character is "#"
+	next if (/^\s*\#/);
+
+	# skil if it is a CLUSTAL W header line
+	next if (/^\s*CLUSTAL/);
+
+	# if spaces or just "*" and "." and ":"
+	if (/^[\*\.\:\s]*$/) {
+	    $seqCount=0;
+	    $prevlinelength=0;
+	    next;
+	}
+
+	($name,$seq) = (/^\s*(\S+)\s+(\S+)\s*$/);
+
+	if ($DEBUG) { print STDERR ("name, seq = $name, $seq\n"); }
+
+	# add new entry
+	if (!defined $desc[$seqCount]) {
+	    $desc[$seqCount] = $name;
+	    $returnVal[$seqCount] = "";
+	}
+    
+    if(!defined $seq) {return (undef, undef, undef, undef, undef, 1);} # Something has gone terrible wrong
+    
+	my @chars = split(//,$seq);
+	my $char;
+	foreach (@chars) {
+	    if ($seqCount == 0) {
+		$reslength++;     # all sequences have same residue length, so only count first one
+	    }
+
+	    $total_residues++;
+	    $linelength++;
+
+	    # set $char
+	    if (defined $input_kind) {
+		if ($input_kind == $AA) {
+		    $char = (isAA($_)) ? $_ : "-";
+		} else { # == $NA
+		    $char = (isNA($_)) ? $_ : "-";
+		}
+	    } else {
+		$char = $_;
+		if (isNA($char)) {
+		    $NA_count++;
+		}
+	    }
+	    
+	    $returnVal[$seqCount] .= $char;
+	}
+
+	if ($seqCount == 0) {
+	    $prevlinelength = $linelength;
+	} elsif ($prevlinelength != $linelength) {  # different number of residues, so complain
+	    return (undef, undef, undef, 0, $name);
+	}
+	$linelength=0;
+
+	$seqCount++;
+    }
+
+    # determine whether to use $NA or $AA
+    if (!defined $input_kind ) {
+	if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) {
+	    $kind = $NA;
+	} else { 
+	    $kind = $AA;
+	}
+    }
+
+    return (\@returnVal, \@desc, $reslength, 1, undef);
+}
+
+# if $input_kind is defined, residues that are not defined are set to space
+sub getSeqs_FASTA {
+
+    if ($DEBUG) {print STDERR "DOING FASTA\n";}
+
+    my ($input_data_r, $input_kind) = @_;
+
+    my @returnVal;
+    my @desc;
+    my $count=-1;
+    my $newElem=0;
+
+#    my $input_kind = pop @_;
+
+#    my $CONFIDENCE_LIMIT = 0.90;
+    my $NA_count = 0;
+    my $total_residues = 0;
+    my $reslength = 0;
+    my $maxreslength = 0;
+    
+    my ($goodlength, $currline, $prevline);
+
+
+#    # skip all lines that are all spaces
+#    while ($_[0] =~ /^\s*$/) {
+#	shift @_;
+#    }
+
+#    foreach (@_) {
+    foreach (@$input_data_r) {
+
+	$_ =~ s/\s+$//;
+
+	# skip if it is a comment character -- first character is "#"
+	next if (/^\s*\#/);
+
+	# skip all lines that are all spaces
+	next if (/^\s*$/);
+
+	$_ =~ s/\s+$//;  # cut trailing white space
+	$_ =~ s/^\s+//;  # cut leading white space
+	if (/>/) {
+	    $currline = $_;
+	    ($desc[scalar @desc]) = ($_ =~ />\s*(.+)$/);
+
+	    if (not $newElem) {		
+		$count++;
+		$newElem = 1;
+	    }
+	} else {
+	    if ($newElem) {
+		$maxreslength = $reslength if $maxreslength == 0;
+		if (($maxreslength != 0) && ($maxreslength != $reslength)) {
+		    return (undef, undef, undef, 0, $prevline);
+		}
+
+		$maxreslength = $reslength;
+		$reslength = 0;
+	    }
+
+	    my @chars = split(//,$_);
+	    my $char;
+	    foreach (@chars) {
+		$reslength++;
+		$total_residues++;
+
+		# set $char
+		if (defined $input_kind) {
+		    if ($input_kind == $AA) {
+			$char = (isAA($_)) ? $_ : "-";
+		    } else { # == $NA
+			$char = (isNA($_)) ? $_ : "-";
+		    }
+		} else {
+		    $char = ($_ =~ /[a-zA-Z]/) ? $_ : "-";  # if not alpha char, use space
+		    if (isNA($char) && !isSpace($char)) {
+			$NA_count++;
+		    }
+		}
+
+		if ($newElem) {
+		    $returnVal[$count] = $char;
+		} else {
+		    $returnVal[$count] .= $char;
+		}
+		$newElem = 0;
+	    }
+	    $prevline = $currline if $currline =~ />/;
+	}
+    }
+
+    # check if last is biggest
+    if (($maxreslength != 0) && ($maxreslength != $reslength)) {
+	return (undef, undef, undef, 0, $prevline);
+    }
+#    $maxreslength = ($reslength > $maxreslength) ? $reslength : $maxreslength;
+
+    # determine whether to use $NA or $AA
+    if (!defined $input_kind) {
+	if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) {
+	    $kind = $NA;
+	} else { 
+	    $kind = $AA;
+	}
+    }
+
+    return (\@returnVal, \@desc, $maxreslength || $reslength, 1, undef);  # 1 for good lengths
+}
+
+sub isSpace {
+    return $_[0] =~ /[ \-]/;
+}
+
+sub isAA {
+    return (defined $AMINOACIDS{lc $_[0]});
+}
+
+sub isNA {
+    return (defined $BASES{lc $_[0]});
+}
+
+################################################################################
+######                       DATA BUILDING FUNCTIONS                      ######
+################################################################################
+
+
+# arguments: takes reference to array and lines aligned sequences of bases or
+#            amino acids
+# returns: updated reference array to reflect contents of sequences sorted
+#          vertically by height as described by (1)
+sub buildData {
+    
+    my $currentx = 0;
+    my $h;
+    my $count=0;
+    my $maxreslength = pop (@_);
+    my $stretch = pop(@_);
+    my $smallsampletoggle = pop (@_);
+    my $totalsize = $#_+1;
+
+    while ($currentx < $maxreslength) {       #(length $_[0])) {
+	my $allspaces = 1;
+	my $spaceCount=0;
+
+	# get vertical sequence
+	my @vert=();
+	foreach (@_) {  # foreach sequence
+	    my $currentchar;
+
+	    # set currentchar, set to " " if $_ is not long enough
+	    if ($currentx >= (length $_)) {
+		$currentchar = " ";
+	    } else {
+		$currentchar = substr($_,$currentx,1);
+	    }
+
+	    # in all sequences, "-" is considered as a space
+	    # don't count " " and "-"
+	    if (($currentchar ne "-") && ($currentchar ne " ")) {
+		$vert[scalar @vert] = uc substr($_,$currentx,1);
+		$allspaces = 0;
+	    } else {
+		$spaceCount++;
+	    }
+	}
+
+	if ($allspaces) {
+	    # build @vert
+	    @vert = (" 0", ">0");
+
+	    # place in @data
+	    $data[scalar @data] = \@vert;
+
+	    $currentx++;
+	    next;
+	}
+
+	my $error;
+	if ($smallsampletoggle) {
+	    $error=getError($kind,$totalsize - $spaceCount);
+	} else {
+	    $error = 0;
+	}
+
+	# sort vertical sequence by amino acid or base
+	@vert = sort(@vert);
+	my $total = $#vert + 1;
+
+	# find H(l) -- must be done before collapsing
+	$h = getH(@vert);
+
+	# collect like terms
+	@vert = collapse(@vert);
+
+	# get R(l)
+	my $r;
+	if (!defined $stretch || !$stretch) {
+	    $r= getR($kind, $h, $error);
+	} else {
+	    $r = ($kind == $NA) ? 2 : (log(20) / log(2));
+	}
+
+	# place heights
+	my $count=0;
+	my $height;
+	my $elem;
+	foreach $elem (@vert) {
+	    $height = getHeight(substr($elem, 2) / $total,$r);
+	    $vert[$count] = substr($elem,0,1) . $height;
+	    $count++;
+	}
+
+	# sort by height
+	@vert = sort height_sort @vert;
+
+	# put in error bar size
+	$vert[$count] = ">$error";
+
+	# place in @data
+	$data[scalar @data] = \@vert;
+
+	$currentx++;
+    }
+}
+
+# uses error approximation given by:
+#             e :=  (s-1) / (2 * ln2 * ntrue);
+sub getError {
+    return ((($_[0] == $NA) ? 4 : 20) - 1) / (2 * log(2) * $_[1]);
+}
+
+sub height_sort {
+    my ($lettera, $heighta) = ($a =~ /^(.{1})(\S+)$/); #substr($a,1);
+    my ($letterb, $heightb) = ($b =~ /^(.{1})(\S+)$/); #substr($b,1);
+    
+    # compare by height first, then letter
+    if ($heighta <=> $heightb) {
+	return $heighta <=> $heightb;
+    } else {
+	return $letterb cmp $lettera;  #reversed for some reason...
+    }
+}
+
+sub collapse {
+    my @returnVal;
+    my $current = $_[0];
+    my $count=0;
+    my $freq;
+
+    foreach (@_) {
+        if ($current eq $_) {
+            $count++;
+        } else {
+	    $returnVal[scalar @returnVal] = "$current $count";
+
+            $current = $_;
+            $count=1;
+        }
+    }
+
+    # add last element
+    $returnVal[scalar @returnVal] = "$current $count";
+
+    return @returnVal;
+}
+
+# arguments : $_[0] : list of bases or amino acids
+sub getH {
+    my $h = 0;
+    my (@vert) = @_;  # vertical sequence (comparing multiple sequences)
+
+    my $current = $vert[0];
+    my $count=0;
+    my $freq;
+
+    foreach (@vert) {
+	if ($current eq $_) {
+	    $count++;
+	} else {
+	    $freq = $count / ($#vert + 1);
+	    $h += $freq * (log ($freq) / log(2));
+
+	    $current = $_;
+	    $count=1;
+	}
+    }
+
+    # add last element
+    $freq = $count / ($#vert + 1);
+    $h += $freq * (log ($freq) / log(2));
+
+    return -$h;
+}
+
+# arguments : $_[0] : $AA or $NA
+#             $_[1] : uncertainty = H(l)
+#             $_[2] : error correction factor for small number of sequences
+#                     = e(n) ; assummed to be 0 if not given
+sub getR {
+    my $max = ($_[0] == $NA) ? 2 : (log(20) / log(2));
+    my $e = (defined $_[2]) ? $_[2] : 0;
+    return ($max - ($_[1] + $e));
+}
+
+# arguments: $_[0] : frequency = f(b,l)
+#            $_[1] : R(l)
+sub getHeight {
+    return $_[0] * $_[1]
+}
+
+1;
--- a/mutation_analysis.r	Mon Aug 18 04:42:59 2014 -0400
+++ b/mutation_analysis.r	Wed Sep 10 10:33:29 2014 -0400
@@ -294,35 +294,15 @@
 cat(length(dat$Sequence.ID), file="n.txt")
 
 
-
-
-
-
-
-
-
+weblogo = dat[,c("Sequence.ID", "VGene")]
+weblogo$VGene = gsub("\\*.*", "", weblogo$VGene)
 
-
-
-
-
-
-
-
+rs12 = read.table("HS12RSS.txt", sep="\t", header=TRUE)
+rs23 = read.table("HS23RSS.txt", sep="\t", header=TRUE)
 
-
-
-
-
-
-
-
+result12 = merge(weblogo, rs12, by.x="VGene", by.y="Gene")
+result23 = merge(weblogo, rs23, by.x="VGene", by.y="Gene")
 
-
-
-
+write.table(x=result12$Sequence, file="weblogo_in_rs12.txt", sep=",",quote=F,row.names=F,col.names=F)
+write.table(x=result23$Sequence, file="weblogo_in_rs23.txt", sep=",",quote=F,row.names=F,col.names=F)
 
-
-
-
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seqlogo	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,356 @@
+#!/usr/bin/perl -w
+
+=head1 NAME
+
+  seqlogo - runs the logo creation script 
+
+=head1 SYNOPSIS
+
+  seqlogo [OPTION]...-f [FILENAME]
+
+=head1 DESCRIPTION
+
+    Creates a logo for the given input filename.
+
+    Available options:
+    -B <bar bits>              Number of bits in bar (real between 0, 1)
+    -T <tic bits>              Number of bits between tic marks
+    -C <chars per line>        Number of characters per line of logo
+    -d <box shrink factor>     Shrink factor of characters if option c is toggled
+    -E <error bar fraction>    Fraction of error bar to show (real between 0, 1 )
+    -f <input file>            Input filename
+    -F <format>                Format of output (EPS, GIF, PDF, PNG), - for STDOUT
+    -h <logo height>           Height of output logo (real > 0)
+    -k <kind of data>          0 for amino acid, 1 for nucleic acid ; if not
+                                 defined, a 90% certainty method is used to
+                                 determine whether the input data is amino acid or
+                                 nucleic acid
+    -l <sequence lower bound>  Lower bound of sequence (integer)
+    -m <sequence upper bound>  Upper bound of sequence (integer)
+    -o <output file>           Name of output file
+    -s <sequence start>        Sequence start number, defaults to 1 (int)
+    -t <titletext>             Text of title, enclosed in "" if more than one word
+    -w <logo width>            Width of output logo
+    -x <x-axis label>          Label for x-axis
+    -y <y-axis label>          Label for y-axis
+
+    Available toggles (no values associated)
+    -a       Toggle antialiasing
+    -b       Toggle bar ends
+    -c       Toggle color
+    -e       Toggle error bar
+    -M       Toggle small sample correction
+    -O       Toggle outlining of characters
+    -n       Toggle numbering of x-axis
+    -S       Toggle stretching of logos to entire length
+    -X       Toggle boxing of characters
+    -Y       Toggle y-axis
+
+=head1 EXAMPLE
+
+  The following command takes as input "input.fasta" and returns the logo in the
+  form "logo.eps". Antialiasing, bar ends, color, small sample correction,
+  numbering of x-axis, and y-axis labelling are turned on:
+
+    seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY
+
+=cut
+
+use vars qw($PATH);
+
+BEGIN {
+
+    use FindBin qw($Bin);
+    use lib "$Bin";
+
+    $PATH = $Bin;
+
+##    $PATH = "/h/gary/Seqlogo/Code/";
+#    $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code";
+#    unshift(@INC, $PATH);
+}
+
+use logo;
+use template;
+use Getopt::Std;
+use FileHandle;
+
+
+my $opts;
+$opts =
+  $opt_a || # antialiasing
+  $opt_b || # bar ends (0 or 1)
+  $opt_c || # color
+  $opt_e || # show error bar (0 or 1)
+  $opt_n || # numbering (0 or 1)
+  $opt_M || # small sample correction (0 or 1)
+  $opt_O || # outline (0 or 1)
+  $opt_S || # stretch
+  $opt_X || # box (0 for no box, 1 for box)
+  $opt_Y || # y axis
+
+  $opt_B || # bar bits (real)
+  $opt_T || # tics bits (real)
+  $opt_C || # chars per line
+  $opt_d || # box shrinking factor (<1)
+  $opt_E || # error bar fraction (real)
+  $opt_f || # input filename
+  $opt_F || # format (PNG, EPS, PDF, GIF)
+  $opt_h || # logo height (cm)
+  $opt_k || # 0 = amino acid, 1 = nucleic acid
+  $opt_l || # lower bound of sequence to put in logo
+  $opt_m || # max bound of sequence to put in logo
+  $opt_o || # output file
+  $opt_s || # start number for very beginning of sequence
+  $opt_t || # title text (string)
+  $opt_w || # logo width (cm)
+  $opt_x || # x axis label
+  $opt_y || # y axis label
+$opts;
+
+################################################################################
+#####                              USAGE                                   #####
+################################################################################
+
+sub usage {
+    my $usage = <<END
+
+usage: seqlogo -f <input filename> [OPTIONs with values]
+Creates a logo for the given input filename.
+
+Available options:
+  -B <bar bits>              Number of bits in bar (real # > 0)
+  -T <tic bits>              Number of bits between tic marks
+  -C <chars per line>        Number of characters per line of logo
+  -d <box shrink factor>     Shrink factor of characters if option c is toggled
+  -E <error bar fraction>    Fraction of error bar to show (real # > 0)
+  -f <input filename>        Input filename
+  -F <format>                Format of output (EPS, GIF, PDF, PNG), - for STDOUT
+  -h <logo height>           Height of output logo (real # > 0)
+  -k <kind of data>          0 for amino acid, 1 for nucleic acid
+  -l <sequence lower bound>  Lower bound of sequence (integer)
+  -m <sequence upper bound>  Upper bound of sequence (integer)
+  -o <output file>           Name of output file
+  -s <sequence start>        Sequence start number, defaults to 1 (int)
+  -t <titletext>             Text of title, enclosed in "" if more than one word
+  -w <logo width>            Width of output logo
+  -x <x-axis label>          Label for x-axis
+  -y <y-axis label>          Label for y-axis
+
+Available toggles (no values associated) bOenc
+  -a       Toggle antialiasing
+  -b       Toggle bar ends
+  -c       Toggle color
+  -e       Toggle error bar
+  -M       Toggle small sample correction
+  -O       Toggle outlining of characters
+  -p       Toggle fineprint
+  -n       Toggle numbering of x-axis
+  -S       Toggle stretching of logos to entire length
+  -X       Toggle boxing of characters
+  -Y       Toggle y-axis
+
+END
+    ;
+
+    return $usage;
+}
+
+################################################################################
+#####                         MAIN FUNCTION                                #####
+################################################################################
+
+# arguments : $_[0] : file name
+MAIN: {
+    init();
+
+    # feed data from file to make height data array reference
+    my @input = <INPUTFILE>;
+    close (INPUTFILE);
+    my %heightparams = (
+			smallsampletoggle => $opt_M,
+			input_kind => $opt_k,
+			stretch => $opt_S
+			);
+
+    my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) =
+	logo::getHeightData(\@input, \%heightparams);
+
+    # check for errors
+    if ((defined $validformat) && ($validformat == 1)) {
+	die("Error: Invalid input format does not conform to FASTA, " .
+	    "CLUSTAL, or Flat.\n");
+    }
+    if (!$goodlength) {
+	die("Error: Number of characters in each logo line is not " .
+	    "consistent, starting at: ", $badline, "\n");
+    }
+
+    my %input = (
+		 LOGO_HEIGHT => $opt_h,
+		 LOGO_WIDTH => $opt_w,
+		 COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW",
+
+		 LOGOSTART => $opt_l,
+		 LOGOEND => $opt_m,
+		 START_NUM => $opt_s,
+
+		 TITLETEXT => $opt_t,
+		 YAXIS_LABEL => $opt_y,
+		 XAXIS_LABEL => $opt_x,
+
+		 BOXSHRINK => $opt_d,
+		 CHARSPERLINE => $opt_C,
+		 BARBITS => $opt_B,
+		 TICBITS => $opt_T,
+		 RES => "96",
+		 "FORMAT" => (uc $opt_F),
+
+                 # toggles
+		 ANTIALIAS => $opt_a,
+	         ERRBAR => $opt_e,
+		 FINEPRINT => $opt_p,
+		 NUMBERING => $opt_n,
+		 OUTLINE => $opt_O,
+		 SHOWENDS => $opt_b,
+		 SHOWINGBOX => $opt_X,
+                 YAXIS => $opt_Y
+		 );
+
+    template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH);
+}
+
+
+################################################################################
+#####                      FUNCTINOS FOR INIT                              #####
+################################################################################
+
+# all ints
+sub isInt {
+    return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0;
+}
+
+# all reals
+sub isReal {
+    return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0;
+}
+
+sub isZeroOrOne {
+    return ($_[0] == 0 || $_[0] == 1) ? 1 : 0;
+}
+
+sub init {
+
+#  if (not defined $PATH) {
+#      die ("PATH must be defined\n");
+#  } elsif (not -e $PATH) {
+#      die ("PATH ($PATH) must exist\n");
+#  } elsif (not -d $PATH) {
+#      die ("PATH ($PATH) must be a directory\n");
+#  }
+
+  &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY');
+
+    if (defined $opt_B &&
+	(!isReal($opt_B) || $opt_B < 0) ) {
+	printf("\noption B must be a positive real, but is $opt_B, $!\n");
+	die &usage();
+    }
+    if (defined $opt_d && 
+	( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) {
+        print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n");
+        die &usage();
+    }
+    if (defined $opt_E &&
+	(!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) {
+        print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n");
+        die &usage();
+    }
+    if (defined $opt_f) {
+        open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n";
+    } else {
+	print("\ninput file not specified, terminating...\n");
+        die &usage();
+    }
+    if (defined $opt_h && 
+	(!isReal($opt_h) || $opt_h < 0) ) {
+        print("\noption h must be a positive real, but is $opt_h, $!\n");
+        die &usage();
+    }
+    if (defined $opt_w && 
+	(!isReal($opt_w) || $opt_w < 0) ) {
+        print("\noption w must be a positive real, but is $opt_w, $!\n");
+        die &usage();
+    }
+    if (defined $opt_k &&
+	(!isZeroOrOne($opt_k)) ) {
+	print("\noption k must be 0 or 1, but is $opt_k, $!\n");
+	die &usage();
+    }
+
+    #toggles
+    if (!defined $opt_a) {
+	$opt_a = 0;
+    }
+    if (!defined $opt_b) {
+	$opt_b = 0;
+    }
+    if (!defined $opt_c) {
+	$opt_c = 0;
+    }
+    if (!defined $opt_e) {
+	$opt_e = 0;
+    }
+    if (!defined $opt_n) {
+	$opt_n = 0;
+    }
+    if (!defined $opt_M) {
+	$opt_M = 0;
+    }
+    if (!defined $opt_O) {
+	$opt_O = 0;
+    }
+    if (!defined $opt_p) {
+	$opt_p = 0;
+    }
+    if (!defined $opt_S) {
+	$opt_S = 0;
+    }
+    if (!defined $opt_X) {
+        $opt_X = 0;
+    };
+    if (!defined $opt_Y) {
+        $opt_Y = 0;
+    };
+
+    if (!defined $opt_F) {
+	$opt_F = "EPS";  # default to EPS
+    }
+    if (!defined $opt_o) {
+	$opt_o = "-";    # for standard out
+    } else {
+#	$opt_o =~ s/\.\S*$//;         # remove extension if there is one
+	$opt_o .= "." . (lc $opt_F);  # make file name
+    }
+
+    if (defined $opt_C &&
+	(!isInt($opt_C) || $opt_C < 0) ) {
+        printf("\noption C must be a postive integer, but is $opt_C, $!\n");
+        die &usage();
+    }
+
+    if (defined $opt_l && !isInt($opt_l)) {
+	printf("\noption l must be an integer, but is $opt_l, $!\n");
+	die &usage();
+    }
+
+    if (defined $opt_m && !isInt($opt_m)) {
+	printf("\noption m must be an integer, but is $opt_m, $!\n");
+	die &usage();
+    }
+
+    if (defined $opt_s && !isInt($opt_s)) {
+        printf("\noption s must be an integer, but is $opt_s, $!\n");
+        die &usage();
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/template.eps	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,674 @@
+%!PS-Adobe-3.0 EPSF-3.0
+%%Title: Sequence Logo : {$TITLE}
+%%Creator: {$CREATOR}
+%%CreationDate: {$CREATIONDATE}
+%%BoundingBox:   0  0  {$LOGOWIDTHPOINTS}  {$LOGOHEIGHTPOINTS} 
+%%Pages: 0
+%%DocumentFonts: 
+%%EndComments
+
+{$DESC}
+
+
+% ---- CONSTANTS ----
+/cmfactor 72 2.54 div def % defines points -> cm conversion
+/cm {cmfactor mul} bind def % defines centimeters
+
+
+% ---- VARIABLES ----
+
+{$COLORDEF}
+
+/logoWidth {$LOGOWIDTH} cm def
+/logoHeight {$LOGOLINEHEIGHT} cm def
+/logoTitle ({$TITLE}) def
+
+/yaxis {$YAXIS} def
+/yaxisLabel ({$YAXIS_LABEL}) def
+/yaxisBits  {$BARBITS} def % bits
+/yaxisTicBits {$TICBITS} def
+
+
+/xaxis {$NUMBERING} def
+/xaxisLabel ({$XAXIS_LABEL}) def
+/showEnds ({$SHOWENDS}) def % d: DNA, p: PROTEIN, -: none
+
+/showFineprint true def
+/fineprint ({$FINEPRINT}) def
+
+/charsPerLine {$CHARSPERLINE} def
+/logoLines {$LOGOLINES} def
+
+/showingBox ({$SHOWINGBOX}) def    %n s f
+/shrinking {$SHRINKBOOLEAN} def
+/shrink  {$SHRINKFACTOR} def
+/outline {$OUTLINE} def
+
+/IbeamFraction  {$ERRORBARFRACTION} def
+/IbeamGray      0.50 def
+/IbeamLineWidth 0.5 def
+
+/fontsize       12 def
+/titleFontsize  14 def
+/smallFontsize   6 def
+
+/defaultColor {$DEFAULT_COLOR} def 
+
+{$COLORDICT}
+
+% Standard DNA/RNA color scheme
+% /colorDict << 
+%   (G)  orange
+%   (T)  red   
+%   (C)  blue  
+%   (A)  green 
+%   (U)  red   
+% >> def
+
+% Standard Amino Acid colors
+%/colorDict << 
+%  (G)  green  
+%  (S)  green  
+%  (T)  green  
+%  (Y)  green  
+%  (C)  green  
+%  (N)  purple 
+%  (Q)  purple 
+%  (K)  blue   
+%  (R)  blue   
+%  (H)  blue   
+%  (D)  red    
+%  (E)  red    
+%  (P)  black  
+%  (A)  black  
+%  (W)  black  
+%  (F)  black  
+%  (L)  black  
+%  (I)  black  
+%  (M)  black  
+%  (V)  black  
+%>> def
+
+
+
+% ---- DERIVED PARAMETERS ----
+
+/leftMargin
+  fontsize 3.5 mul
+
+def 
+
+/bottomMargin
+  fontsize 0.75 mul
+
+  % Add extra room for axis
+  xaxis {fontsize 1.75 mul add } if
+  xaxisLabel () eq {} {fontsize 0.75 mul add} ifelse
+def
+
+
+/topMargin 
+  logoTitle () eq { 10 }{titleFontsize 4 add} ifelse
+def
+
+/rightMargin 
+  %Add extra room if showing ends
+  showEnds (-) eq { fontsize}{fontsize 1.5 mul} ifelse
+def
+
+/yaxisHeight 
+  logoHeight 
+  bottomMargin sub  
+  topMargin sub
+def
+
+/ticWidth fontsize 2 div def
+
+/pointsPerBit yaxisHeight yaxisBits div  def
+
+/isBoxed 
+  showingBox (s) eq
+  showingBox (f) eq or { 
+    true
+  } {
+    false
+  } ifelse
+def
+
+/stackMargin 1 def
+
+% Do not add space aroung characters if characters are boxed
+/charRightMargin 
+  isBoxed { 0.0 } {stackMargin} ifelse
+def
+
+/charTopMargin 
+  isBoxed { 0.0 } {stackMargin} ifelse
+def
+
+/charWidth
+  logoWidth
+  leftMargin sub
+  rightMargin sub
+  charsPerLine div
+  charRightMargin sub
+def
+
+/charWidth4 charWidth 4 div def
+/charWidth2 charWidth 2 div def
+
+/stackWidth 
+  charWidth charRightMargin add
+def
+ 
+/numberFontsize 
+  fontsize charWidth lt {fontsize}{charWidth} ifelse
+def
+
+% movements to place 5'/N and 3'/C symbols
+/leftEndDeltaX  fontsize neg         def
+/leftEndDeltaY  fontsize 1.5 mul neg def
+/rightEndDeltaX fontsize 0.25 mul     def
+/rightEndDeltaY leftEndDeltaY        def
+
+% Outline width is proporional to charWidth, 
+% but no less that 1 point
+/outlinewidth 
+  charWidth 32 div dup 1 gt  {}{pop 1} ifelse
+def
+
+
+% ---- PROCEDURES ----
+
+/StartLogo { 
+  % Save state
+  save 
+  gsave 
+
+  % Print Logo Title, top center 
+  gsave 
+    SetTitleFont
+
+    logoWidth 2 div
+    logoTitle
+    stringwidth pop 2 div sub
+    logoHeight logoLines mul  
+    titleFontsize sub
+    moveto
+
+    logoTitle
+    show
+  grestore
+
+  % Print X-axis label, bottom center
+  gsave
+    SetStringFont
+
+    logoWidth 2 div
+    xaxisLabel stringwidth pop 2 div sub
+    fontsize 3 div
+    moveto
+
+    xaxisLabel
+    show
+  grestore
+
+  % Show Fine Print
+  showFineprint {
+    gsave
+      SetSmallFont
+      logoWidth
+        fineprint stringwidth pop sub
+        smallFontsize sub
+          smallFontsize 3 div
+      moveto
+    
+      fineprint show
+    grestore
+  } if
+
+  % Move to lower left corner of last line, first stack
+  leftMargin bottomMargin translate
+
+  % Move above first line ready for StartLine 
+  0 logoLines logoHeight mul translate
+
+  SetLogoFont
+} bind def
+
+/EndLogo { 
+  grestore 
+  showpage 
+  restore 
+} bind def
+
+
+/StartLine{ 
+  % move down to the bottom of the line:
+  0 logoHeight neg translate
+  
+  gsave 
+    yaxis { MakeYaxis } if
+    xaxis { ShowLeftEnd } if
+} bind def
+
+/EndLine{ 
+    xaxis { ShowRightEnd } if
+  grestore 
+} bind def
+
+
+/MakeYaxis {
+  gsave    
+    stackMargin neg 0 translate
+    ShowYaxisBar
+    ShowYaxisLabel
+  grestore
+} bind def
+
+
+/ShowYaxisBar { 
+  gsave  
+    SetStringFont
+
+    /str 10 string def % string to hold number  
+    /smallgap stackMargin 2 div def
+
+    % Draw first tic and bar
+    gsave    
+      ticWidth neg 0 moveto 
+      ticWidth 0 rlineto 
+      0 yaxisHeight rlineto
+      stroke
+    grestore
+
+   
+    % Draw the tics
+    % initial increment limit proc for
+    0 yaxisTicBits yaxisBits abs %cvi
+    {/loopnumber exch def
+
+      % convert the number coming from the loop to a string
+      % and find its width
+      loopnumber 10 str cvrs
+      /stringnumber exch def % string representing the number
+
+      stringnumber stringwidth pop
+      /numberwidth exch def % width of number to show
+
+      /halfnumberheight
+         stringnumber CharBoxHeight 2 div
+      def
+
+      numberwidth % move back width of number
+      neg loopnumber pointsPerBit mul % shift on y axis
+      halfnumberheight sub % down half the digit
+
+      moveto % move back the width of the string
+
+      ticWidth neg smallgap sub % Move back a bit more  
+      0 rmoveto % move back the width of the tic  
+
+      stringnumber show
+      smallgap 0 rmoveto % Make a small gap  
+
+      % now show the tic mark
+      0 halfnumberheight rmoveto % shift up again
+      ticWidth 0 rlineto
+      stroke
+    } for
+  grestore
+} bind def
+
+/ShowYaxisLabel {
+  gsave
+    SetStringFont
+
+    % How far we move left depends on the size of
+    % the tic labels.
+    /str 10 string def % string to hold number  
+    yaxisBits yaxisTicBits div cvi yaxisTicBits mul 
+    str cvs stringwidth pop
+    ticWidth 1.5 mul  add neg  
+
+
+    yaxisHeight
+    yaxisLabel stringwidth pop
+    sub 2 div
+
+    translate
+    90 rotate
+    0 0 moveto
+    yaxisLabel show
+  grestore
+} bind def
+
+
+/StartStack {  % <stackNumber> startstack
+  xaxis {MakeNumber}{pop} ifelse
+  gsave
+} bind def
+
+/EndStack {
+  grestore
+  stackWidth 0 translate
+} bind def
+
+
+% Draw a character whose height is proportional to symbol bits
+/MakeSymbol{ % charbits character MakeSymbol
+  gsave
+    /char exch def
+    /bits exch def
+
+    /bitsHeight 
+       bits pointsPerBit mul 
+    def
+
+    /charHeight 
+       bitsHeight charTopMargin sub
+       dup 
+       0.0 gt {}{pop 0.0} ifelse % if neg replace with zero 
+    def 
+ 
+    charHeight 0.0 gt {
+      char SetColor
+      charWidth charHeight char ShowChar
+
+      showingBox (s) eq { % Unfilled box
+        0 0 charWidth charHeight false ShowBox
+      } if
+
+      showingBox (f) eq { % Filled box
+        0 0 charWidth charHeight true ShowBox
+      } if
+
+    } if
+
+  grestore
+
+  0 bitsHeight translate 
+} bind def
+
+
+/ShowChar { % <width> <height> <char> ShowChar
+  gsave
+    /tc exch def    % The character
+    /ysize exch def % the y size of the character
+    /xsize exch def % the x size of the character
+
+    /xmulfactor 1 def 
+    /ymulfactor 1 def
+
+  
+    % if ysize is negative, make everything upside down!
+    ysize 0 lt {
+      % put ysize normal in this orientation
+      /ysize ysize abs def
+      xsize ysize translate
+      180 rotate
+    } if
+
+    shrinking {
+      xsize 1 shrink sub 2 div mul
+        ysize 1 shrink sub 2 div mul translate 
+
+      shrink shrink scale
+    } if
+
+    % Calculate the font scaling factors
+    % Loop twice to catch small correction due to first scaling
+    2 {
+      gsave
+        xmulfactor ymulfactor scale
+      
+        ysize % desired size of character in points
+        tc CharBoxHeight 
+        dup 0.0 ne {
+          div % factor by which to scale up the character
+          /ymulfactor exch def
+        } % end if
+        {pop pop}
+        ifelse
+
+        xsize % desired size of character in points
+        tc CharBoxWidth  
+        dup 0.0 ne {
+          div % factor by which to scale up the character
+          /xmulfactor exch def
+        } % end if
+        {pop pop}
+        ifelse
+      grestore
+    } repeat
+
+    % Adjust horizontal position if the symbol is an I
+    tc (I) eq {
+      charWidth 2 div % half of requested character width
+      tc CharBoxWidth 2 div % half of the actual character
+      sub 0 translate
+      % Avoid x scaling for I 
+      /xmulfactor 1 def 
+    } if
+
+
+    % ---- Finally, draw the character
+  
+    newpath
+    xmulfactor ymulfactor scale
+
+    % Move lower left corner of character to start point
+    tc CharBox pop pop % llx lly : Lower left corner
+    exch neg exch neg
+    moveto
+
+    outline {  % outline characters:
+      outlinewidth setlinewidth
+      tc true charpath
+      gsave 1 setgray fill grestore
+      clip stroke
+    } { % regular characters
+      tc show
+    } ifelse
+
+  grestore
+} bind def
+
+
+/ShowBox { % x1 y1 x2 y2 filled ShowBox
+  gsave
+    /filled exch def 
+    /y2 exch def
+    /x2 exch def
+    /y1 exch def
+    /x1 exch def
+    newpath
+    x1 y1 moveto
+    x2 y1 lineto
+    x2 y2 lineto
+    x1 y2 lineto
+    closepath
+
+    clip
+    
+    filled {
+      fill
+    }{ 
+      0 setgray stroke   
+    } ifelse
+
+  grestore
+} bind def
+
+
+/MakeNumber { % number MakeNumber
+  gsave
+    SetNumberFont
+    stackWidth 0 translate
+    90 rotate % rotate so the number fits
+    dup stringwidth pop % find the length of the number
+    neg % prepare for move
+    stackMargin sub % Move back a bit
+    charWidth (0) CharBoxHeight % height of numbers
+    sub 2 div %
+    moveto % move back to provide space
+    show
+  grestore
+} bind def
+
+
+/Ibeam{ % heightInBits Ibeam
+  gsave
+    % Make an Ibeam of twice the given height in bits
+    /height exch  pointsPerBit mul def 
+    /heightDRAW height IbeamFraction mul def
+
+    IbeamLineWidth setlinewidth
+    IbeamGray setgray 
+
+    charWidth2 height neg translate
+    ShowIbar
+    newpath
+      0 0 moveto
+      0 heightDRAW rlineto
+    stroke
+    newpath
+      0 height moveto
+      0 height rmoveto
+      currentpoint translate
+    ShowIbar
+    newpath
+    0 0 moveto
+    0 heightDRAW neg rlineto
+    currentpoint translate
+    stroke
+  grestore
+} bind def
+
+
+/ShowIbar { % make a horizontal bar
+  gsave
+    newpath
+      charWidth4 neg 0 moveto
+      charWidth4 0 lineto
+    stroke
+  grestore
+} bind def
+
+
+/ShowLeftEnd {
+  gsave
+    SetStringFont
+    leftEndDeltaX leftEndDeltaY moveto
+    showEnds (d) eq {(5) show ShowPrime} if
+    showEnds (p) eq {(N) show} if
+  grestore
+} bind def
+
+
+/ShowRightEnd { 
+  gsave
+    SetStringFont
+    rightEndDeltaX rightEndDeltaY moveto
+    showEnds (d) eq {(3) show ShowPrime} if
+    showEnds (p) eq {(C) show} if
+  grestore
+} bind def
+
+
+/ShowPrime {
+  gsave
+    SetPrimeFont
+    (\242) show 
+  grestore
+} bind def
+
+ 
+/SetColor{ % <char> SetColor
+  dup colorDict exch known {
+    colorDict exch get aload pop setrgbcolor
+  } {
+    pop
+    defaultColor aload pop setrgbcolor
+  } ifelse 
+} bind def
+
+% define fonts
+/SetTitleFont {/Times-Bold findfont titleFontsize scalefont setfont} bind def
+/SetLogoFont  {/Helvetica-Narrow-Bold findfont charWidth  scalefont setfont} bind def
+/SetStringFont{/Helvetica-Bold findfont fontsize scalefont setfont} bind def
+/SetPrimeFont {/Symbol findfont fontsize scalefont setfont} bind def
+/SetSmallFont {/Helvetica findfont smallFontsize scalefont setfont} bind def
+
+/SetNumberFont {
+    /Helvetica-Bold findfont 
+    numberFontsize
+    scalefont
+    setfont
+} bind def
+
+%Take a single character and return the bounding box
+/CharBox { % <char> CharBox <lx> <ly> <ux> <uy>
+  gsave
+    newpath
+    0 0 moveto
+    % take the character off the stack and use it here:
+    true charpath 
+    flattenpath 
+    pathbbox % compute bounding box of 1 pt. char => lx ly ux uy
+    % the path is here, but toss it away ...
+  grestore
+} bind def
+
+
+% The height of a characters bounding box
+/CharBoxHeight { % <char> CharBoxHeight <num>
+  CharBox
+  exch pop sub neg exch pop
+} bind def
+
+
+% The width of a characters bounding box
+/CharBoxWidth { % <char> CharBoxHeight <num>
+  CharBox
+  pop exch pop sub neg 
+} bind def
+
+
+% Deprecated names
+/startstack {StartStack} bind  def
+/endstack {EndStack}     bind def
+/makenumber {MakeNumber} bind def
+/numchar { MakeSymbol }  bind def
+
+%%EndProlog
+
+%%Page: 1 1
+StartLogo
+StartLine % line number 1
+
+{$DATA}
+
+EndLine
+EndLogo
+
+%%EOF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/template.pm	Wed Sep 10 10:33:29 2014 -0400
@@ -0,0 +1,672 @@
+=head1 NAME
+
+  template.pm - creates logo output in various formats
+
+=head1 SYNOPSIS
+
+  Perl module 
+
+=head1 DESCRIPTION
+
+  logo.cgi and run.pl collect the logo data. They can then enter
+  template::create_template to create logo output in the following formats:
+
+   * EPS
+   * GIF
+   * PDF
+   * PNG
+
+  
+
+  If the configuration file "logo.conf" exists in the working directory, then
+  it will be parsed for the locations of GhostScript (gs) and convert. The
+  following is an
+  example of the configuration file ("#" at beginning of line indicates
+  comment):
+
+    # Make configuration changes here. Rename this file to logo.conf
+    # gs version 5.5 does not work, 6.5 does
+    # set the PATHS
+    gs=/usr/local/bin/gs
+    convert=/usr/X11R6/bin/convert
+
+=cut
+
+package template;
+use strict;
+
+
+################################################################################
+########                   STATE FOR FILLINGS                         ##########
+################################################################################
+
+#        BARBITS            number of bits in vertical y-axis bar
+#        CHARWIDTH          width of characters in logo
+#        COLORSCHEME        "a" for amino acid;
+#                           "n" for nucleic acid;
+#                           "b" for black (no color scheme)
+#                           color scheme
+#        DATA               string of heights of characters in cm
+#        DESC               description of aligned sequences
+#        FINEPRINT          enable adverts/credits
+#        LOGOLINES          number of lines of logo
+#        LOGOHEIGHT         height of EACH LINE in logo (in cm)
+#        LOGOWIDTH          width of final logo in cm
+#        LOGOHEIGHTPOINTS   height of final logo in points
+#        LOGOWIDTHPOINTS    height of final logo in points
+#        LOGOSTART          logo output will begin at residue LOGOSTART
+#        LOGOEND            logo output will end at residue LOGOEND
+#        ERRBAR             1 to include error bar, 0 to exclude
+#        ERRORBARFRACTION   percent of error bar to show in range [0,1]
+#        KIND               $AA for amino acid, $NA for nucleic acid
+#        NUMBERING          1 to show residue numbers, 0 to exclude
+#        OUTLINE            1 to print characters in outline form;
+#                           0 to print characters in solid form
+#        SHOWENDS           "d" to show 5' and 3' ends;
+#                           "p" to show N and C termini;
+#                           "-" to exclude end markers
+#        SHOWINGBOX         "n" to have No boxes around characters;
+#                           "s" to have boxes around characters, with Shrinking;
+#                           "f" to have Filled boxes around characters
+#        SHRINKBOOLEAN      1 to shrink characters; 0 to exclude shrinking
+#        SHRINKFACTOR       amount to shrink in range from 1(no shrinking) to
+#                           0(full shrinking)
+#        START_NUM          start number for very beginning of sequence
+#        TITLE              title of logo
+#        YAXIS              1 to turn on y axis and its labels
+
+################################################################################
+#####                   VARIABLES AND DEFAULT VALUES                       #####
+################################################################################
+
+my %defaults = (
+		LOGOHEIGHT => 5,
+		LOGOWIDTH => 8,
+
+		YAXIS => "false",
+		TITLE => "",
+		YAXIS_LABEL => "bits",
+		XAXIS_LABEL => "",
+
+		BARENDS => "false",
+		OUTLINE => "false",
+		SHOWINGBOX => "n",
+		NUMBERING => "false",
+        #FINEPRINT => "Created by: G. E. Crooks, G. Hon, J.-M. Chandonia & S. E. Brenner, (2002) <weblogo.berkeley.edu>",
+        FINEPRINT => "weblogo.berkeley.edu",
+
+		ERRORBARFRACTION => "1",
+		ERRBAR => "0",
+		SHRINKFACTOR => "1",
+		START_NUM => "1",
+
+		DEFAULT_COLOR => "black",
+
+		black  => "[0 0 0]",
+		red    =>  "[0.8 0 0]",
+		green  =>  "[0 0.8 0]",
+		blue   =>  "[0 0 0.8]",
+		yellow =>  "[1 0.7 1.0]",
+		purple =>  "[0.8 0 0.8]",
+		orange =>  "[1 0.7 0]"
+		);
+
+my $AA = 0;
+my $NA = 1;
+my $PATH;
+
+################################################################################
+#####                          SOME FUNCTIONS                              #####
+################################################################################
+
+
+sub create_template {
+    my ($input, $kind, $desc_r, $data_r, $outfile, $path) = @_;
+
+    # set path
+    $PATH = $path;
+
+    #Create EPS file
+    my %fillings;
+
+    # put parameters in fillings
+    makeFillings(\%fillings, $input, $kind, $desc_r, $defaults{FINEPRINT});
+
+    # set default data if not filled
+    setDefaults(\%fillings, \%defaults, scalar @$data_r);
+
+    # put color in fillings
+    setColors(\%fillings, \%defaults);
+
+    # put data in fillings
+    setData(\%fillings, $data_r);
+
+    # make eps output
+    my $eps = fillTemplate("$PATH/template.eps", \%fillings);
+    my $format = $input->{FORMAT};
+
+    # convert
+    my ($gsprog, $convertprog) = getProgs();
+
+#    print STDERR "(gsprog, convertprog) = ($gsprog, $convertprog)\n";
+
+    my $width = $fillings{LOGOWIDTHPOINTS};
+    my $height = $fillings{LOGOHEIGHTPOINTS};       # height of entire logo
+    my $res = $input->{RES};
+    my $antialias = (defined $input->{ANTIALIAS} && $input->{ANTIALIAS}) ? "-dTextAlphaBits=4" : "";
+
+    my $r = $width . "x" . $height;
+
+    if( $format eq "EPS" ) {
+	if ($outfile eq "-") { # if standard out
+	    print $eps;
+	} else {
+	    open (WRITEME, ">$outfile");
+	    print WRITEME $eps;
+	    close (WRITEME);
+	}
+
+    } elsif ($format eq "PDF"){
+#	print("outfile = $outfile\n");
+	my $program = "| $gsprog -sOutputFile=$outfile -sDEVICE=pdfwrite -dPDFSETTINGS=/printer -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height -dEmbedAllFonts=true $antialias -dSAFER -dBATCH  -dNOPAUSE -_";
+	open(WRITEME, $program);
+	print WRITEME "$eps";
+	close (WRITEME);
+
+    } elsif ( $format eq "PNG" ) {
+	my $program = "| $gsprog -sOutputFile=$outfile -sDEVICE=png16m -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height $antialias -dSAFER -dBATCH  -dNOPAUSE -_";
+    #print ("$program");
+	open(WRITEME, $program);
+	print WRITEME "$eps";
+        close (WRITEME);
+
+    } elsif ($format eq "GIF") {
+	# convert to EPS first, then GIF
+        die "Please check logo.conf: convert program does not exist" 
+            if (!defined $convertprog || !(-e $convertprog));
+
+	my $program = "| $gsprog -sOutputFile=- -sDEVICE=png16m -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height $antialias -dSAFER -dBATCH  -dNOPAUSE -_";
+
+	if ($outfile eq "-") {
+	    $program .= " | $convertprog png:- gif:-";
+	} else {
+	    $program .= " | $convertprog png:- $outfile";
+	}
+
+	open(WRITEME, $program);
+	print WRITEME "$eps";
+        close (WRITEME);
+    }
+}
+
+#deprecated
+sub c {
+    return create_template( @_);
+}
+
+sub getProgs {
+    my ($gsprog, $convertprog) = ("gs", "convert");     
+
+
+    # No configuration file, then return defaults.
+    return ($gsprog, $convertprog) if (! (-e "$PATH/logo.conf"));
+    open (CONF, "$PATH/logo.conf");
+    
+    while (<CONF>) {
+	next if (/^\#/);  # skip lines beginning with "#"
+	if (m/^gs/i) {    # if looks like gs (case insensitive)
+	    ($gsprog) = ($_ =~ /^\S+\=(.+)$/);
+	}
+	if (m/^convert/i) { # if looks like convert (case insensitive)
+	    ($convertprog) = ($_ =~ /^\S+\=(.+)$/);
+	}
+    }
+
+    # Do these fings exist?
+    my ($gsprogname) = ($gsprog =~ /^(\S+)/);
+
+    die "Please check $PATH/logo.conf: gs program ($gsprogname) does not exist" if (!defined $gsprogname || !(-e $gsprogname));
+    #die "Please check logo.conf: convert program does not exist" if (!defined $convertprog || !(-e $convertprog));
+
+    return ($gsprog, $convertprog);
+}
+
+sub get_eps { 
+    my ($input, $kind, $desc_r, $data_r) = @_;
+    my %fillings;
+
+    # put parameters in fillings
+    makeFillings(\%fillings, $input, $kind, $desc_r);
+
+    # set default data if not filled
+    setDefaults(\%fillings, \%defaults, $#$data_r);
+
+    # put data in fillings
+    setData(\%fillings, $data_r);
+
+    # make output
+    return fillTemplate("$PATH/template.eps", \%fillings);
+}
+
+sub fillTemplate {
+    my ($filename, $fillings) = @_;
+
+    if (not -e $filename) {
+	die "filename ($filename) must exist\n";
+    }
+
+    my $text;
+    local $/; # slurp mode (undef)
+    local *F; # create local filehandle
+    open(F, "< $filename\0") || return;
+    $text = <F>;
+    close(F);
+
+    #replace {$KEYWORDS} with value in %$fillings hash
+    $text =~ s{ \{\$ (.*?) \} }
+    { exists( $fillings->{$1})
+	  ? $fillings->{$1}
+                     : ""
+		     }gsex;
+    return $text;
+}
+
+
+################################################################################
+#####                    FILL THE FILLINGS HERE                            #####
+################################################################################
+
+sub isChecked {
+    return 0 if (!defined $_[0]);
+    return $_[0];
+}
+
+# negative/positive ints
+sub isInt {
+    return ($_[0] =~ /^[-\+]?\d+$/) ? 1 : 0;
+}
+
+sub makeFillings {
+
+#    my ($fillings, $input, $kind, $desc_r, $data_r, $fineprint) = @_;
+    my ($fillings, $input, $kind, $desc_r, $fineprint) = @_;
+    $fillings->{KIND} = $kind;
+    $fillings->{LOGOHEIGHT} = $input->{LOGO_HEIGHT};
+    $fillings->{LOGOWIDTH} = $input->{LOGO_WIDTH};
+    $fillings->{OUTLINE} = (isChecked($input->{OUTLINE})) ? "true" : "false";
+    $fillings->{NUMBERING} = (isChecked($input->{NUMBERING})) ? "true" : "false";
+    $fillings->{FINEPRINT} = (isChecked($input->{FINEPRINT})) ? $fineprint : "";
+
+    $fillings->{LOGOSTART} = $input->{LOGOSTART};
+    $fillings->{LOGOEND} = $input->{LOGOEND};
+    $fillings->{START_NUM} = $input->{START_NUM};
+
+    $fillings->{YAXIS} = (isChecked($input->{YAXIS}) && !isChecked($input->{STRETCH})) ? "true" : "false";
+    
+    
+    
+    $fillings->{TITLE} = $input->{TITLETEXT};
+    $fillings->{YAXIS_LABEL} = $input->{YAXIS_LABEL};
+
+    $fillings->{XAXIS_LABEL} = $input->{XAXIS_LABEL};
+    $fillings->{ERRBAR} = $input->{ERRBAR};
+    $fillings->{SHOWINGBOX} = (isChecked($input->{SHOWINGBOX})) ? "s" : "n";
+    $fillings->{SHRINKBOOLEAN} = ($fillings->{SHOWINGBOX} eq "s") ? "true" : "false";
+    $fillings->{SHRINKFACTOR} = $input->{BOXSHRINK};
+
+    if ((defined $input->{CHARSPERLINE}) && 
+	isInt($input->{CHARSPERLINE}) && 
+	($input->{CHARSPERLINE} > 0)) {
+	$fillings->{CHARSPERLINE} = $input->{CHARSPERLINE};
+    }
+
+    if (defined $input->{BARBITS}) {
+	$fillings->{BARBITS} = $input->{BARBITS};
+    } else {
+	$fillings->{BARBITS} = ($fillings->{KIND} == $AA) ? 4.3 : 2;
+    }
+
+    if (defined $input->{TICBITS}) {
+	$fillings->{TICBITS} = $input->{TICBITS};
+    } else {
+	$fillings->{TICBITS} = 1;
+    }
+
+
+
+
+#    if (isChecked($input->{NOCOLOR})) {
+#        $fillings->{COLORSCHEME} = "b";
+#    } else {
+#        $fillings->{COLORSCHEME} = ($kind == $AA) ? "a" : "n";
+#    }
+
+    #color
+    if (defined $input->{DEFAULT_COLOR}) {
+	$fillings->{DEFAULT_COLOR} = (isHexColor( $input->{DEFAULT_COLOR})) ? "c" . $input->{DEFAULT_COLOR} :
+	                              $input->{DEFAULT_COLOR};
+    }
+
+    if (isChecked($input->{SHOWENDS})) {
+	$fillings->{SHOWENDS} = ($fillings->{KIND} == $AA) ? "p" : "d";
+    } else {
+        $fillings->{SHOWENDS} = "-";
+    }
+
+    $fillings->{DESC} = getDescription($desc_r, $fillings->{KIND});
+
+    $fillings->{ERRORBARFRACTION} = $input->{ERRORBARFRACTION};
+    $fillings->{COLORSCHEME} = $input->{COLORSCHEME};
+    $fillings->{COLORS} = $input->{COLORS};
+}
+
+sub getDescription {
+    my $returnVal = "";
+
+    foreach (@{$_[0]}) {
+        if(defined($_)) {
+            $returnVal .= "% * $_\n";
+        } else {
+            $returnVal .= "% * \n";
+        }
+    }
+
+    if ($_[1] == $AA) {
+        $returnVal .= "% * PROTEIN ALIGNMENT";
+    } else {
+        $returnVal .= "% * NUCLEOTIDE ALIGNMENT";
+    }
+
+    return $returnVal;
+}
+
+
+################################################################################
+#####                        SETTING DEFAULTS                              #####
+################################################################################
+
+sub setDefaults {
+    my ($fillings, $defaults, $numchars) = @_;
+
+    $fillings->{LOGOHEIGHT} = $defaults->{LOGOHEIGHT} if !defined $fillings->{LOGOHEIGHT};
+    $fillings->{LOGOWIDTH} = $defaults->{LOGOWIDTH} if !defined $fillings->{LOGOWIDTH};
+
+    $fillings->{START_NUM} = $defaults->{START_NUM} if !defined $fillings->{START_NUM};    
+    $fillings->{LOGOSTART} = $fillings->{START_NUM} if !defined $fillings->{LOGOSTART};
+    $fillings->{LOGOEND} = $numchars + $fillings->{LOGOSTART} - 1 if !defined $fillings->{LOGOEND};
+
+    $fillings->{YAXIS} = $defaults->{YAXIS} if !defined $fillings->{YAXIS};
+    $fillings->{TITLE} = $defaults->{TITLE} if !defined $fillings->{TITLE} || $fillings->{TITLE} eq "";
+    #$fillings->{YAXIS_LABEL} = $defaults->{YAXIS_LABEL} if !defined $fillings->{YAXIS_LABEL} || $fillings->{YAXIS_LABEL} eq "";
+    $fillings->{YAXIS_LABEL} = $defaults->{YAXIS_LABEL} if !defined $fillings->{YAXIS_LABEL} ;
+    $fillings->{XAXIS_LABEL} = $defaults->{XAXIS_LABEL} if !defined $fillings->{XAXIS_LABEL} || $fillings->{XAXIS_LABEL} eq "";
+
+    $fillings->{BARENDS} = $defaults->{BARENDS} if !defined $fillings->{BARENDS};
+    $fillings->{OUTLINE} = $defaults->{OUTLINE} if !defined $fillings->{OUTLINE};
+    $fillings->{SHOWINGBOX} = $defaults->{SHOWINGBOX} if !defined $fillings->{SHOWINGBOX};
+    $fillings->{NUMBERING} = $defaults->{NUMBERING} if !defined $fillings->{NUMBERING};
+
+    $fillings->{ERRORBARFRACTION} = $defaults->{ERRORBARFRACTION} if !defined $fillings->{ERRORBARFRACTION};
+    $fillings->{SHRINKFACTOR} = $defaults->{SHRINKFACTOR} if !defined $fillings->{SHRINKFACTOR};
+    $fillings->{ERRBAR} = $defaults->{ERRBAR} if !defined $fillings->{ERRBAR};
+
+#    printf("logostart = %d, start num = %d, numchars = $numchars, logoend = %d\n", $fillings->{LOGOSTART}, $fillings->{START_NUM},
+#	   $fillings->{LOGOEND});
+
+    my $givenrange = $fillings->{LOGOEND} - $fillings->{LOGOSTART} + 1;
+    my $possiblerange = $numchars - ($fillings->{LOGOSTART} - $fillings->{START_NUM});
+
+    if (!defined $fillings->{CHARSPERLINE} && ($givenrange > $possiblerange)) {
+	$fillings->{CHARSPERLINE} = $numchars - ($fillings->{LOGOSTART} - $fillings->{START_NUM});
+    } elsif (!defined $fillings->{CHARSPERLINE}) {
+	$fillings->{CHARSPERLINE} = $fillings->{LOGOEND} - $fillings->{LOGOSTART} + 1;
+    }
+
+    $fillings->{DEFAULT_COLOR} = $defaults->{DEFAULT_COLOR} if !defined $fillings->{DEFAULT_COLOR} ||
+	                                                       $fillings->{DEFAULT_COLOR} eq "";
+
+#    printf("chars per line = %s\n",$fillings->{CHARSPERLINE});    
+#    print("givenrange = $givenrange, possiblerange = $possiblerange\n");
+
+    if ($givenrange > $possiblerange) {
+	$fillings->{LOGOLINES} = roundup($possiblerange / $fillings->{CHARSPERLINE});
+    } else {
+	$fillings->{LOGOLINES} = roundup($givenrange / $fillings->{CHARSPERLINE});
+    }
+
+    $fillings->{CHARWIDTH} = ($fillings->{LOGOWIDTH} - 1.5) / $fillings->{CHARSPERLINE};
+
+#    # change height if more than 1 line
+#    $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54)) * $fillings->{LOGOLINES};
+
+    # LOGOHEIGHTPOITNS is the height input by the user
+#    $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54));                          # user specifies height of entire logo
+    $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54)) * $fillings->{LOGOLINES};  # user specifies height of logo line
+    $fillings->{LOGOWIDTHPOINTS} =  int($fillings->{LOGOWIDTH}  * (72 / 2.54));
+
+    # LOGOLINEHEIGHT is the height of each logo line, in cm
+#    $fillings->{LOGOLINEHEIGHT} = $fillings->{LOGOHEIGHT} / $fillings->{LOGOLINES};    # user specifies height of entire logo
+    $fillings->{LOGOLINEHEIGHT} = $fillings->{LOGOHEIGHT};                              # user specifies height of logo line
+}
+
+sub roundup {
+    return ($_[0] - int($_[0]) > 0) ? int($_[0] + 1) : $_[0];
+}
+
+
+################################################################################
+#####                         COLORS                                       #####
+################################################################################
+
+sub getDefaultColors {
+    my ($defaults) = @_;
+    my $returnVal = "";
+    $returnVal .= "/black " . $defaults->{black} . " def\n";
+    $returnVal .= "/red " . $defaults->{red} . " def\n";
+    $returnVal .= "/green " . $defaults->{green} . " def\n";
+    $returnVal .= "/blue " . $defaults->{blue} . " def\n";
+    $returnVal .= "/yellow " . $defaults->{yellow} . " def\n";
+    $returnVal .= "/purple " . $defaults->{purple} . " def\n";
+    $returnVal .= "/orange " . $defaults->{orange} . " def\n";
+
+    return $returnVal;
+}
+
+sub getNAColors {
+    my $returnVal = <<END
+% Standard DNA/RNA color scheme
+/colorDict << 
+   (G)  orange
+   (T)  red   
+   (C)  blue  
+   (A)  green 
+   (U)  red   
+END
+    ;
+
+    return $returnVal;
+}
+    
+sub getAAColors {
+    my $returnVal = <<END
+% Standard Amino Acid colors
+/colorDict << 
+  (G)  green  
+  (S)  green  
+  (T)  green  
+  (Y)  green  
+  (C)  green  
+  (N)  purple 
+  (Q)  purple 
+  (K)  blue   
+  (R)  blue   
+  (H)  blue   
+  (D)  red    
+  (E)  red    
+  (P)  black  
+  (A)  black  
+  (W)  black  
+  (F)  black  
+  (L)  black  
+  (I)  black  
+  (M)  black  
+  (V)  black
+END
+    ;
+
+    return $returnVal;
+}
+
+sub setColors {
+    my ($fillings, $defaults, $input) = @_;
+    my $colordef = getDefaultColors($defaults);
+    my $colordict = "/colorDict <<\n";
+
+    if ($fillings->{COLORSCHEME} eq "DEFAULT") {
+	$colordef = getDefaultColors($defaults);
+
+	if ($fillings->{KIND} eq $AA) {
+	    $colordict = getAAColors();
+	} else {
+	    $colordict = getNAColors();
+	}
+    } elsif ($fillings->{COLORSCHEME} eq "BW") {
+	# do nothing for dict
+    } else {
+
+	my %colorhash = %{ $fillings->{COLORS} };
+	my $colorName = "";
+
+	foreach (keys %colorhash) {  # keys are strings of residues, value = color name or color code (FF0000)
+	    # add color to definitions
+	    $colorName = $colorhash{$_};
+
+#	    print("color = $_\n");
+
+	    addColorDef(\$colordef, $colorName ) if (isHexColor($colorName));
+	    
+	    # add have each residue use the color
+	    foreach (split(//, $_)) {
+		# add color to dictionary
+		if (isHexColor($colorName)) {
+		    $colordict .= "  ($_)  c$colorName\n" if !($_ =~ /^\s*$/);
+		} else {
+		    $colordict .= "  ($_)  $colorName\n" if !($_ =~ /^\s*$/);
+		}
+	    }
+	}
+    }
+
+    $colordict .= "\n>> def";
+
+    # add to fillings
+    $fillings->{COLORDEF} = $colordef;
+    $fillings->{COLORDICT} = $colordict;
+}
+
+sub addColorDef {
+#    print("adding to color def\n");
+    my ($colordef_r, $color) = @_;
+    my $PSColor = getPSColor($color);
+    $$colordef_r .= "/c$color $PSColor def\n";
+}
+
+sub isHexColor {
+    return ($_[0] =~ /^[0-9a-fA-F]+$/) && (length $_[0] == 6);
+}
+
+# know that it is hex color
+sub getPSColor {
+    return "[" . hex(substr($_[0],0,2)) / 255 . "  " .
+          	 hex(substr($_[0],2,2)) / 255 . "  " .
+		 hex(substr($_[0],4,2)) / 255 . "]";
+}
+
+
+################################################################################
+#####                         SETTING DATA FIELD                           #####
+################################################################################
+
+sub setData {
+    my ($fillings, $data_r) = @_;
+
+    my @data = @$data_r;
+    my ($height, $letter);
+    my @slice;
+    my $data;
+    my $start_num = $fillings->{START_NUM};
+
+    my $start = $fillings->{LOGOSTART} - $start_num;    # where in @data to start
+    my $end = $fillings->{LOGOEND} - $start_num;        # where in @data to end
+    my $charsperline = $fillings->{CHARSPERLINE};
+
+    my $numlabel = $fillings->{LOGOSTART};
+
+    $end = ($end >= scalar @data) ? (scalar @data - 1) : $end;
+
+    for (my $i=$start ; $i<=$end ; $i++) {
+
+	# if add new lines
+#	if ((($i - $start) % $charsperline == 0) &&
+#	    ($i != $start) &&    # not first one
+#	    ($i != $end)) {      # not last one
+	if ((($i - $start) % $charsperline == 0) &&
+	    ($i != $start)) {    # not first one
+	    $data .= <<END
+EndLine
+StartLine
+
+END
+    ;
+	}
+
+        @slice = @{$data[$i]};
+        $data .= <<END
+($numlabel) startstack
+END
+    ;
+
+        $numlabel++;
+
+        foreach (@slice) {
+            ($letter,$height) = ($_ =~ /^(.{1})(\S+)/);
+
+	    # is space, so leave
+	    if ($letter eq " ") {
+		last;
+	    }
+	    
+	    # look for ">", which is symbol for error bar, then quit
+	    if ($letter eq ">") {
+		last;
+	    }
+
+#	    # look for negative heights
+#	    if ($height < 0) {
+#		next;
+#	    }
+
+            $letter =  (uc $letter);  # always uppercase
+	    $height = ($height < 0) ? 0 : $height;
+            $data .= " $height ($letter) numchar\n";
+        }
+
+        # put in error bars -- size is in $height as read in before
+        if ($fillings->{ERRBAR} && $letter ne " " && $height != 0) {
+	    $data .= " $height Ibeam\n";
+	}
+
+        $data .= <<END
+endstack
+
+END
+    ;
+
+    }
+
+    $fillings->{DATA} = $data;
+}
+
+################################################################################
+
+1;
--- a/wrapper.sh	Mon Aug 18 04:42:59 2014 -0400
+++ b/wrapper.sh	Wed Sep 10 10:33:29 2014 -0400
@@ -12,6 +12,8 @@
 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
 cat $PWD/files/*/10_* > $PWD/hotspots.txt
+cp $dir/HS12RSS.txt $outdir/
+cp $dir/HS23RSS.txt $outdir/
 
 mkdir $outdir/identification/
 python $dir/gene_identification.py --input $PWD/summary.txt --outdir $outdir/identification/
@@ -25,6 +27,8 @@
 do
 	echo "Running $gene <br />" >> $output
 	mkdir $outdir/$gene
+	cp $dir/HS12RSS.txt $outdir/$gene/
+	cp $dir/HS23RSS.txt $outdir/$gene/
 	echo "Filtering input..." >> $output
 	Rscript $dir/filter.r $PWD/summary.txt $outdir/identification/${gene}.txt $outdir/$gene/summary.txt
 	Rscript $dir/filter.r $PWD/mutationanalysis.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationanalysis.txt
@@ -45,6 +49,8 @@
 	cat $mutationtmp $hotspottmp > $tmp
 	paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2
 	cat $tmp2 > $outdir/mutationandhotspot.txt
+	rm $outdir/$gene/HS12RSS.txt
+	rm $outdir/$gene/HS23RSS.txt
 done
 
 Rscript --verbose $dir/mutation_analysis.r $PWD/mutationstats.txt $PWD/summary.txt $outdir/ 2>&1
@@ -98,10 +104,17 @@
 Rscript $dir/piechart.r "${ca1_n},${ca2_n}" "IgA1 - ${ca1_n},IgA2 - ${ca2_n}" "IgA (N = $ca_n)" $outdir/ca.png 2>&1
 Rscript $dir/piechart.r "${cg1_n},${cg2_n},${cg3_n},${cg4_n}" "IgG1 - ${cg1_n},IgG2 - ${cg2_n},IgG3 - ${cg3_n},IgG4 - ${cg4_n}" "IgG (N = $cg_n)" $outdir/cg.png 2>&1
 
+$dir/seqlogo -t "HS12RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs12.txt > $outdir/HS12.png 2>&1
+$dir/seqlogo -t "HS23RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs23.txt > $outdir/HS23.png 2>&1
+
+
 echo "<img src='all.png'/>" >> $output
 echo "<img src='ca.png'/>" >> $output
 echo "<img src='cg.png'/>" >> $output
 
+echo "<img src='HS12.png'/>" >> $output
+echo "<img src='HS23.png'/>" >> $output
+
 for gene in ${genes[@]}
 do
 	echo "<table border='1'><caption>$gene transition table</caption>" >> $output
@@ -120,3 +133,6 @@
 echo "</table>" >> $output
 
 echo "</html>" >> $output
+
+#rm $outdir/HS12RSS.txt
+#rm $outdir/HS23RSS.txt