Mercurial > repos > davidvanzessen > mutation_analysis
changeset 2:2f4298673519 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 10 Sep 2014 10:33:29 -0400 |
parents | 856b5b718d21 |
children | a0b27058dcac |
files | HS12RSS.txt HS23RSS.txt gene_identification.py logo.pm mutation_analysis.r seqlogo template.eps template.pm wrapper.sh |
diffstat | 9 files changed, 2842 insertions(+), 30 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/HS12RSS.txt Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,113 @@ +Gene Sequence +IGKV2D-18 CACAGTGGTACAACCCTGAACAGAAACC +IGKV1D-39 CACAGTGTTACAAGTCATAACATAAACC +TRBJ2-5 CACGGCCCCCGAGCCCCGCACAAAAACC +TRBJ2-6 CACAGCCCGGGGACTCCCCGCAAAAACC +IGKV1-37 CACAGTGTTACAAGGCATAACATAAACC +TRBJ2-1 CACAGTGGGAAGGGGCTGCCCAGAATTC +TRBJ2-2 CACAGCCCTGGGGACCCTGGCGCAAACC +IGKV1-33 CACAGTGTAACAAGTCATAACATAAATC +IGKV3-11 CACAGTGATTCCACATGAAACAAAAACC +IGKV1D-33 CACAGTGTAACAAGTCATAACATAAATC +IGKV1-39 CACAGTGTTACAAGTCGTAACATAAACC +IGHD3-22 CACAGTGTCACAGAGTCCATCAAAAACT +TRAJ31 CACAGCACTTCCTGCCTTTACTGAAACC +TRAJ32 CACAGTCCTTCAGAGCCTTACACTAACC +TRAJ33 CACAGACACAAAAACCTTAACAAAAACC +TRAJ34 CACAGTGATACTGAGATCTACAAAAACC +TRAJ36 CACAGTGTTTCTGCCCAGTACAAAAACA +TRAJ37 CACTCTAATGCTGTACTTTACAAAAACT +TRAJ38 CACAGTCATAGAAAGCTTTACCAAAACC +TRAJ39 CACAGTGATCTTCGGCTGAGCAAAAACC +TRBJ1-2 CACATAAGAATATAGCCACTCTAAAAGG +IGHD2-2 CACAGTGACACAGCCCCATTCCCAAAGC +IGHD1-1 CACCGTGACTCGGGGCTGTTCAGAATCT +IGKV1-27 CACTGTGATACAAGCCCGAACATAAACC +TRBJ1-6 CACAGCTGCAGAGGCTTAGATAAAACCC +TRBJ1-5 CACAGTGCATCATGAGTGTGGCAAACCC +TRBJ1-4 CACAACATTAAAGACTGGAAGGAAAACC +TRBJ1-3 CACAGCCTCCCAGGGCCACTTCAAAACC +IGKV1-6 CACAGTGTTACACACCCGAACAGAAACC +TRBJ2-4 CACAGCCCCGAGACGCGGCACAGAAACT +IGKV1-5 CACAGTGTTACACACCCGAACATAAACC +TRAJ29 CACAGTGATTTCCTCCATAACAAAAACC +TRAJ28 CACAGAGTTTCCTTTCTTTGCAAAAACC +TRAJ27 CACAGTCTTTAGTGCTATTGCAATAACC +TRAJ26 CACAGTGTTTCTGGGCTCAGCAAAAACC +TRAJ24 CACTGTGACAAACACCTCTACAAAATGG +TRAJ23 CACTGTGTTACATATCCTGTCAAAAACA +TRAJ22 CACTATGATTTGCTCAACAACAAAACCA +TRAJ21 CACCATGTTTATTGGCATTACAAAAAAT +IGKV1-8 CACAGTGTTACACACCCAAACAAAAACC +TRBJ2-7 CACGGAGGTGCACCCCCGCATGCAAACC +IGKV1-9 CACAGTGTTACAAACCTGAACATAAACC +IGKV3D-11 CACAGTGATTCCACATGAAACAAAAACC +TRBJ2-3 CACAGCCTGGAGGCCCAGGACAAAAACC +IGKV1-17 CACAGTGTTACACACCCGAACATAAAAC +IGKV1D-17 CACAGTGTTACACACCCGAACATAAACC +IGKV1D-16 CACAGTGTTACACACCCAAACATAAACC +TRAJ58 CACTGTGCTGAGGGGCTTTGCAAAAACC +TRAJ59 CACAGGAGCTGCTGCCTTTACATAAACT +TRAJ52 CACTGCACTGGAGGCCTTTACAAGAACC +TRAJ53 CACAGCCACAGAAGGCTTTACAGAAACA +TRAJ50 CACAGCCATCCAAACCTTTACAATAACT +TRAJ56 CACAATGACACGAGGATCTACAAAAACT +TRAJ57 CACCCCCACAGACTGCTTTACAAATACT +TRAJ54 CACCACAGCACGAGGCTTTACAGAAACT +IGKV3-15 CACAGTGATTCAACATGAAACAAAAACC +TRDD2 CACAATGAAACACATCAGTATAAAAACC +IGKV2-40 CACAGTGGTACAGCCCTGAACAGAAACC +IGKV6D-21 CACTGTGTTACAACCCAGAACAAAAACT +IGKV3-20 CACAGTGATTCCACAGGAAACCAAACCT +TRDJ3 CACATTATGACAGTGCCTCACAGGTAAC +IGHD3-3 CACAGTGACACAGACCTCACCCCAAACC +TRAJ49 CACTGTGATAGGAAGCTCAACAAAAACC +TRAJ48 CACAGTGTTCTAAGTCATTGCAAAAACC +TRAJ41 CACAGTGCGTTCTCCCTAAACAAAAAAC +TRAJ40 CACAGTGTTATGTGTCTCTACATAAACC +TRAJ43 CACAGTAATACATGCTCTAACAAAAACC +TRAJ42 CACAGTCCTATGGGGCTTTACAATAATC +TRAJ45 CACTCTGGGCCAACCCTTTACATAAACT +TRAJ44 CACTGTGAGATGCTTCATAACAGAAACC +TRAJ47 CACAGCGTCAAACTCCTCTACAAAAACA +IGLJ2 CACTGTGACACAGGCTCATACAAAAACC +IGLJ1 CACAGTGACTGAGGCTCAGACCAAAACC +IGLJ7 CACAGTGACACAGCCCCACACACAAACC +IGLJ6 CACTGTGATATAACCCTGCACACAAACC +LCK CACACACACACACACACAAGCCAAAACC +IGKV4-1 CACAGTGCTTCAGCCTCGAACACAAACC +TRAJ4 CACAATCAGATGGTGCTTTACAAGAACT +HPRT_12 CACACACACACACACACACACAAATACA +IGKV3D-20 CACAGTGATTCAGCTTGAAACAAAAACC +TRAJ5 CACCCTGGTACATCACAGTACAAAATCC +TRAJ6 CACAGTGAGGACAGCCTTTGATAAAACC +TRAJ3 CACTGTGGGTAAGGTCTTTGAGATAACC +IGKV2D-29 CACAGAGGTACAGACCAATACAGAAACC +IGKV2D-28 CACAGTGGTACAACCCCTAACAGAAACC +TAL2 CACTGTGACTTACTGGAGATATAAAAAT +IGKV2D-26 CACAGTGCTACAGCCCTGAGCACAAACC +TRAJ61 CACAGGAGTGGGCACCTTTACAAAAACC +TRBD1 CACAATGTTACAGCTTTGTACAAAAACA +IGKV1D-8 CACAGTGTTACACACCCGAACAAAAACC +LMO2 CACAGTATTGTCTTACCCAGCAATAATT +IGKV2D-30 CACAGTGGTACAGCCCTGAACAAAAACC +IGKV1/OR2-108 CACAGTGTTACAAGCCATAACAAAAACC +TRBD2 CACAATGTTACACCATGATACAAAAATG +IGKV2-24 CACAGTGGTACAGCTCTGAACAAAAACC +IGKV2-28 CACAGTGGTACAACCCCTAACAGAAACC +IGKV2-29 CACAGTGGTACAGACCAATACAGAAACC +TRAJ16 CACAGTGATCTATTGTACCACAAAAACC +TRAJ17 CACAATGATTTAAGGCCCAGCAAAAACC +TRAJ15 CACAGTGAAATGAGGCCCTGCAAATACC +TRAJ12 CACAGTGTTTCTTAGTCAGTCAAAAACA +TRAJ13 CACTGTAATGCCTGCCTTTACAAAATGA +TRAJ10 CACAGTGTTTGATGCCTCACAATAAACT +TRAJ11 CACTATAGCAAATCCCCATACAAAAATG +TRAJ18 CACAATGCTGGTCCCCTTTACATGAACC +IGKV1D-43 CACAGTGTTACACACCCAAACAAAAACC +IGKV2-30 CACAGTGGTACAGCCCTGAACAAAAACC +TRAJ30 CACTGTGATTGGGACCATAACAAAAACT +IGHD3-16 CACAGTGACACAGACCTCACTTCAAACC +IGHD4-4 CACAGTGTCACAGAGTCCATCAAAAACC +IGHD3-10 CACAGTGACACAGACCTCATTCTAAACC +IGHD7-27 CACAGTGGTTCTCAGCTCAGCCAAAACC
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/HS23RSS.txt Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,174 @@ +Gene Sequence +IGLV2-8 CACAGTGTTTTAAGTCAATGAGGAAGTAAGACCAAAACC +IGLV4-3 CACAGTGACACAGATGAAGGGGAAGTGAGGCCAAAACTC +TRBV21-1 CACAGTGCCGAATGTTAGCCCTTCTTAGAACACAAACTC +IGHV1-17 CACAGTGCGAAAACCCACATCCTGAGAGTGTCAGAAACC +IGHV1-14 CACAGTGTGAAAACCCACATCCTGAGAGAGTCAGAAATC +IGHV3-15 CACAGTGAGGGGAGGTCAGTGTGAGCCCGGACACAAACC +IGHV1-18 CACAGTGTGAAAACCCACATCCTGAGGGTTTCAGAAACC +IGHV3-11 CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC +IGLV2-5 CACAGTGGTCCAAGTTCTTGGGGAACTGAGACGAAAACC +IGHV3-13 CACAGTGAGGGGAAGTCAGTATGAGCCCAGACACAAACC +TRAV2 CACAGAGGCAGGGAACCCATGAAGAGCTGAACAGAAACA +TRAV3 CACACTGATAGGGGCTGCAGGGGGAGCAGAACACAAACT +TRAV4 CACAGTGAGACAGATGGGCCTGCACCTGTGCCGTTTTCC +TRAV5 CACATTGCTTCTCAGGCACCTGTATCCTGTACCCAAACC +TRBV6-4 CACAGTGCTGCACAGCCATCTCCTCTCTGTACATAAATG +IGHV3-19 CACTGTGAGAGGACGGAAGTGTGAGCCCAGACACAAACC +TRBV6-6 CACAGCGCTACAAGACCATCTCCTCTCTGCACATAAAGG +TRBV18 CACATTGATGCAGAGCCACATCCTCTCAGTCCACAAACA +TRBV6-1 CACAGCGCTGCATGGCCGTCTCCTCTCTGCACATAAAGG +TRBV6-2 CACAGTGCTGCACGGCTGTCTCCTCTCTGCACAGAAAGG +TRBV6-3 CACAGTGCTGCACGGCTGTCTCCTCTCTGCACAGAAAGG +TRBV12-4 CACAGCGCTGCAGAATCACCCCTTTCCTGTGCAGAAACC +TRAV27 CACAGTGCTCTTGAGGCACCTGCTGCCTGCACCCAAACC +TRBV5-4 CACAGCCCGGCAGAGTCTCTGACATTCTATACATAAACT +TRBV5-3 CACAGCCCTGCAGAGTCACTGGAACTCTGTGCACTAATC +TRGV3 CACAGTGATTCAGACCTGTCCTACACCACACTGAAAATC +TRBV5-1 CACAGCCCTACAAAGCCAACCACATTCTGTGCACAAACC +IGHV3-9 CACAGTGAGGGGAAGTCAGCGAGAGCCCAGACAAAAACC +TRGV8 CACAGTGATTCAGACCTGTGCTACACCACACTGAAAATC +TRBV5-8 CACAGCCCAGCAGAATCACTGACATTCTGTATATAAACT +TRDV2 CACCCTGCTGCAGCTCTACTTCTGAGCAGCTCAAAAACC +IGKJ1 CACAGTGAGAGCTCTCCATTGTCTTGCTGAACAGAAACC +IGKJ2 CACAATGGTTCCTCTTAACTTCCCTCCTATACAAAAACT +IGKJ4 CACAGTGAGGGATCTCACCCTTTCCCCTCAACAAAAACC +TRAV26 CACACTGGGACAGATGGGGCTGCACCTGTGCAATATCTC +TRAV25 CACAGTGCTCCCCAAGCACCTGCTGCCTGTCTCCAAATC +TRAV24 CACAGTGCTGTTCAGGCACCTGCAGCCCATACGCAAACC +IGHV1-24 CACAGTGTGAAAACCCACATCCTGAGAGCGTCAGAAACC +TRAV41 CACAGTGCTCCCCAGGCACCTGGAGCCCGTACCTAAACT +IGLV5-52 CACAGTGCTCCAGACCCATGAGGAAGTAAGACAAAACCC +IGLV10-54 CACAGTGCCTCAGGCCAGTGGGGAAGTGAGATAAAAACT +IGHV3-64 CACAGTGAGGAGAAGTTAATGTGGGACCATGCAGAAACC +IGHV3-66 CACAGTGAGGGGAGGCCATTGTGCGCCCAGACACAAACC +TRBV7-9 CACAGCATGGCACAGTCGCCTCCTTCCTGCTCACAAACC +TRBV7-8 CACAGCATGGCACAGTTGCCTCCTTCCTGTTCACAAACC +IGLV1-40 CACAGTGCTCAGGCGTGTGTGTGAACTGAGACAAGAACC +IGLV3-21 CACGGTGACACAGGCAGATGAGGAAGTGAGACAAAAACA +IGHV7-4-1 CACAGTGTGGAAACCCACATCCTGAGAGTGTCAAAAACC +IGLV3-27 CACAGTGACACAGGCAGATGGAGAAATGAGACACAAACC +TRBV4-1 CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC +IGLV1-41 CACAGTGCTCCAGGCCAATGGGGAACTGAGACAAGAACC +TRAV13-1 CACATTGCTTCCCAGGCACCTGCTACCCGTACACAAACC +TRAV13-2 CACATTGCTTTCCAGGCATCTGTAACCATCACCCAAACC +TRBV5-6 CACAGCCCATCAGAGTCACTGACGTTCTGTATATAAACT +IGLV3-25 CACAGTGACACAAGCAGACAGGGAAGTGAGACATAAACC +TRBV10-2 CACAGTGCTGCACAGCTGCCTCCTCTCTGCACGGAAATG +TRBV10-3 CACAGTGCTGCATGGCTGCCTCCTCTCTGCACGTAAACA +TRBV10-1 CACAGTGCTGCACAGCTGCCTCCTCTCTGCACATAAAGG +IGLV6-57 CACAGTGCTCCAGACCCATGGGGAAGTGAGACAGAAACT +TRBV6-5 CACAGCGCTACAAGGCCGTCTCCTCTCTGCACATAAAGG +IGHV3-73 CACAGTGAGGGGAGGTCAGTGTGAGCCCGGACACAAACT +IGHV3-72 CACAGCGAGGGGAGGTCAGTGTGAGCCCGGACACAAACC +IGLV1-47 CACAGTGCTCCAGGCCCAGGGGGAACTGAGACAAGAACC +TRAV9-2 CACAGTGACAGGGACTGCAGGGGGAGCTGAGCACAAACT +IGLV7-46 CACAGTGACAGACCCATGAGAGGAACCAAGACATAAACC +TRBV6-7 CACAGCGCTGCAAGGCTGTCTCCTCTCTGCACATAAAGG +TRBV3-1 CACAGCCCTGCAAAGTCACTGCATCCCTGTGCACAAACC +IGLV7-43 CACAGTGACAGACTCATAAGAGGAACCAAGACATAAACC +IGHV1-8 CACAGTGTGAAAAACCACATCCTCAGAGAGTCAGAAACC +IGLV1-51 CACAGTGCTCCAGCCCAATGGGGAACTGAGACAAGAACC +IGLV1-50 CACAGTGCTCCAGGCCCAGGGGGAGGTGAGAGAAGAACC +IGHV1-3 CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACC +IGHV1-2 CACAGTGTGAAAACCCACATCCTGAGGGTGTCAGAAACC +IGHV4-39 CACAGTGTGGAAACCCACATCCCGAGGGAGTCAGAAACC +TRBV2 CACAGCCTTGCAAAGACAACTCCAGCCTGTGCAAAATCC +TRDD3 CACAGTGCTACAAAACCTACAGAGACCTGTACAAAAACT +TRDD2 CACACAGGTTGGAGTGCATTAAGCCTTTGTCCAAAAACA +TRAV29/DV5 CACAGTGCTCTCCAGGCACCTGCAGCCCGTACTCAAACC +IGLV3-10 CACAGTGACACTGGCAGATGGGGAAGTGAGACACAAACC +TRBV27 CACAGTGTTGCACAGCCAGCTGCTCTCTGCACAAAAACA +TRBV5-5 CACAGCCCGGCAGAATCACTGACATTCTGTATATAAACT +TRGV10 CACATACTAGAACTGTTGAAACAACATGCACAAAATCCC +TRGV11 CACAGTGTTCGAGTTGTCAAGATAAGGTACACAGAAACT +TRGV5 CACAGTGATTCAGACCTGTCCTACACCACACTGAAAATC +TRBV30 CACACTGAGCTGGGTGGGGCAGACATCTGTGCAAAAACC +IGHV3-48 CACAGTGAGGGGAGGTCAGTGTGACACCAGACACAAACC +IGHV3-49 CACAGTGAGGGGAGGTCAATGTGAGCCCAGACACAAACC +TRGV2 CACAGTGATTCAGATCCGCCCTACACCACACTGAAAATC +IGLV CACAGTGCTGCAGGCACATGGGGAACCGAGACAAAAACC +IGHV3-43 CACAGTGAGGGGAAGTCAGCGAGAGCCCAGACAAAAACC +IGHV3-41 CACAGTGAGAGGAAGTCCGTGTGAGCCCAGACACAAACC +IGHV3-7 CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC +HPRT_23 CACTGTAGGTTCATCTTTATGGAGATGCTCATGGCCTCA +TRGV9 CACAGCAGCAGACAGTTTGAGCCATCCCATTCAATAAAT +IGLV4-60 CACAGTGATACAGGCAGATGAGGAAGTGGGACAAAATCC +TRBV28 CACAGCGCAGCACAGCCGCATCCTCTCTGCACAAAAAGA +TRBV25-1 CACAGTGCTACATGGATACCGACACTCCGCACAGAAAGG +IGLV4-69 CACAGTGACACAGGCAGATGAGGAAGTGGGACAGAAACC +IGHV4-28 CACAGTGAGGGGAGGTGAGTGTGAGCCCAGACACAAACC +TRBV3-2 CACAGCCTTACAGAGCCACTGCATCCCTGTGCACAAACC +IGHV6-1 CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC +TRAV19 CACAGTGAGATGGGTGCCTGTGGGAGCCCTACAAAAACC +TRAV10 CACTGTGCTCCACAGGCACTTGAAGCCAGTATGCAAACC +TRAV17 CACAGTGTTCCCCAGGAACCTGCAGCCTCTACGCAAACC +TRBV4-2 CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC +TRBV4-3 CACAGCCTTGCAGAGTCACCGCTTTCCTGTGCAGAAACC +IGHV3-53 CACAGTGAGGGGAGGTCAGTGTGAGCCCAGACACAAACC +IGHV3-52 CACAGTGAGAGGCACAGTGAGGGGAGGTCAGTGTGAGCC +TRBV7-3 CACAGCATGACACAATCGCCTCCTTCCTGCTCATAAACC +TRBV7-2 CACAGCATGGCACAGTCGCCTCCTTCCTGCTCATAAACC +TRBV7-7 CACAGCATGGCACAGTCGCCTCCTTCCTGTTCACAAACC +TRBV7-6 CACAGTGTGGCATAGTCGCCTCCTTCCTGTTCACAAACC +IGHV2-5 CACAAAGACACAGCCCAGGGCACCTCCTGTACAAAAACC +IGLV3-13 CACAGTGACACAGGCAGATGGGGAAGTGAGACACAAATC +IGLV3-16 CACAGTGACACAAGGAGACAGGGAAGTGAAACATAAACC +IGLV3-19 CACAGTGACACAGACAGATGGGGAAGTGAGACAGAAACC +IGHJ3 CACAGGGACACAGTCCGTTCCTAGACCCAGACACAAACC +IGHV1-45 CACAGTGTGAAAACCCACATCCTGAGACCGTCAGAAACC +IGHV1-46 CACAGTGTGAGAAACCACATCCTCAGAGTGTCAGAAACC +IGLV8-61 CACAGTGATTTAAACCTATGAGGAAGTGCAACTAAAACC +IGHV4-59 CACAGTGAGGGGAGGTGAGTGTGAGCCCAGACAAAAACC +TRAV1-1 CACAGTGACTATGAGGCCTCCTTAACTGTGCCAAAATTC +TRAV1-2 CACGGTGACTATGAGGCCTCTTTAGCTGCACCAAAATTC +TRGV7 CACAGTGATTCACACCTGCCCTACACCACACTGAAAATC +IGHV3-20 CACAGTGAGGGGAAGCCAGTGAGAGCCCAGACACAAACG +IGHV3-21 CACAGTGAGGGGAAGTCAGTGTGAGCCCAGACACAAACC +IGHV3-23 CACAGTGAGGGAAGTCATTGTGAGCCCAGACACAAACCT +IGLV2-23 CACAGTGGTCCAAGTTCATGGGGAACTGAGACCAAAACC +TRBD1 CACAATGATTCAACTCTACGGGAAACCTTTACAAAAACC +TRGV4 CACAGTGATTCAGATCCGCCCTACACCACACTGAAAACC +TRAV12-1 CACAGTGCTCCCCAGACACCTGCAGTCTGTACCCAAACC +TRAV12-3 CACAGTGCTCCCCAGACACTTGCAGTCTGTACCCAAACC +TRAV12-2 CACAGTGCTCCCCAGACACCTGCAGTCTGTACCCAAACC +IGLV3-6 CACAGTGACACAGGCAGATTAGGAAGTGAGACACAAACC +IGLV3-1 CACAGTGACACAGGCAGATGCGGAAGTGAGACAGAAACC +IGHV1-58 CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACG +TRBD2 CACGATGATTCAGGTAGAGGAGGTGCTTTTACAAAAAAC +IGLV3-9 CACAGTGACACAGGCAGATGAAGAAGTGAGACACAAACC +TRBV6-8 CACAGCGCTACAAGGTTGTCTCCTCTCTGCACATAAAGG +TRBV9 CACAGCCCTGCATGAGCATCAGCCTTCTGTGCAATAACA +IGHV3-35 CACTGTGAGAGGTCGGAAGTGTGAGCCCAGACACAAACC +IGHV3-33 CACAGTGAGGGGAGGTCATTGTGCGCCCAGACACAAACC +IGLV2-14 CACAGTGGTCCAAGTTCCTGGGGAACTGAGACCAAAACC +IGHV3-30 CACAGTGAGGGGAAGTCATTGTGCGCCCAGACACAAACC +TRAV8-1 CACAGTGTCTGGGACTGCAAAGGGAGCTGAACACAAACT +IGLV2-18 CACAGAGGTCCAAGTTCCTGGGGAACTGAGACCAAAACC +TRAV14/DV4 CACAGTGACAGAACTGTCGGAGGGAGGTGTACAAAAGCC +IGHV2-26 CACAGAGACACAGCCCAGGATGCCTCCTGTACAAGAACC +TRBV16 CACAGTGTTAAATATTAGCTAATCTTAGGACACAGACTC +TRBV15 CACAGAGCTGCAGTGCTTCCTGCTCTCTGTTCATAAACC +TRBV14 CACAGTGCTTCACAGTCGTGCCCTTGCTGTGCAAAACCA +TRBV13 CACAGCATGGCACAGTCGCCTCCTTCCTGCTCACAAACC +TRBV11-3 CACAGTGTAGCAGAGACACTTCCCTCCTGTGCAGAAAAC +TRBV19 CACAGTGAAGCACGGATGTCGCCTCTCTGTGCATAAATG +TRBV11-2 CACAGTGTAGCAGAGACACTTCCCTCCTGTGCAGAAAAC +TRAV38-1 CACAATGAGATGAGCAGCAGGTAGAGGCTTACAGAAATC +TRBV12-5 CACAGCGCTGCAGAATCACCTGCTCCCTGTGCAGAAACC +TRBV12-2 CACAGCGCTGCAGAATCTCCCCCTCCCTGTGCAGAAACT +TRBV12-3 CACAGCGCTGCAGAATCACCCCTTTCCTGTGCAGAAAAC +IGHV1-69 CACAGTGTGAAAACCCACATCCTGAGAGTGTCAGAAACC +LMO1_23 CACAGTGGCTCACCACCCCACACAGCCCTCACTCTGGCA +IGLV5-45 CACAGTGACACACACAGATGGGGAAGTGGGACAAAAACC +IGLV5-37 CACAGTGACACACACAGATGGGAAAGTGGGACAAAAACC +TRAV23/DV6 CACAGTGCTCCCCAGGCACCTGAAGCCTGTACCCAAACC +TRDV1 CACAGTGTTTGAAGTGATAGTAAAAGCAAAACAAAAACC +TRBV20-1 CACAGCGCCAGGAGGGGATCAGACACCGCGGCAAGAACC +IGHV2-10 CACAGAGACAGAGCCCAGGGTGCCTCTTGTACAAGACCC +TRBV29-1 CACAGTGCGGGGCACAGATCAAAGATCTGAGCAAGAACC +TRBV23-1 CACAGCACTGAAATGTCAGTTCCTCTTAGCACACAAACT +TRAV34 CACAGCGATCTTCAGGCCTCTATCAGCTGTCTCCAAACC +TRAV30 CACAGTGATACCCAGGCCTCCAAGACCTGTACTCAAACC +TAL1_23 CACAGCCTCGCGCATTTCTGTATATTGCGTAAGGAAAAG +TRAV39 CACAGTGCTCCCCTGACGCCACCAGTCTGTACCCAAACC
--- a/gene_identification.py Mon Aug 18 04:42:59 2014 -0400 +++ b/gene_identification.py Wed Sep 10 10:33:29 2014 -0400 @@ -1,6 +1,7 @@ import re import argparse - +import time +starttime= int(time.time() * 1000) parser = argparse.ArgumentParser() parser.add_argument("--input", help="The 1_Summary file from an IMGT zip file") @@ -255,6 +256,7 @@ #print ca,cg,cm,(ca+cg+cm) +print "Time: %i" % (int(time.time() * 1000) - starttime) @@ -262,4 +264,3 @@ -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/logo.pm Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,826 @@ +#!/usr/local/bin/perl -w + +=head1 NAME + + logo.pm - organizes data in FASTA and CLUSTAL formats into height data. + +=head1 SYNOPSIS + + Perl module + +=head1 DESCRIPTION + + logo.pm: Takes in strings of aligned sequences and sorts them vertically + based on height as assigned by the following equations found in + Schneider and Stephens paper "Sequence Logos: A New Way to Display + Consensus Sequences": + + height = f(b,l) * R(l) (1) + + where f(b,l) is the frequency of base or amino acid "b" at position + "l". R(l) is amount of information present at position "l" and can + be quantified as follows: + + R(l) for amino acids = log(20) - (H(l) + e(n)) (2a) + R(l) for nucleic acids = 2 - (H(l) + e(n)) (2b) + + where log is taken base 2, H(l) is the uncertainty at position "l", + and e(n) is the error correction factor for small "n". H(l) is + computed as follows: + + H(l) = - (Sum f(b,l) * log[ f(b,l) ]) (3) + + where again, log is taken base 2. f(b,l) is the frequency of base + "b" at position "l". The sum is taken over all amino acids or + bases, depending on which the data is. + + Currently, logo.pm uses an approximation for e(n), given by: + + e(n) = (s-1) / (2 * ln2 * n) (4) + + Where s is 4 for nucleotides, 20 for amino acids ; n is the number + of sequences in the alignment. e(n) also gives the height of error + bars. + +=cut + +package logo; + +use strict; +use Carp; + +################################################################################ +###### SOME VARIABLES ###### +################################################################################ + +my $DEBUG = 0; + +my $AA = 0; +my $NA = 1; + +my %BASES = ("a" => "adenine", + "t" => "thymine", + "g" => "guanine", + "c" => "cytosine", + "u" => "uracil"); + +# does not include B or Z +my %AMINOACIDS = ("a" => "", "c" => "", "d" => "", "e" => "", "f" => "", + "g" => "", "h" => "", "i" => "", "k" => "", "l" => "", + "m" => "", "n" => "", "p" => "", "q" => "", "r" => "", + "s" => "", "t" => "", "v" => "", "w" => "", "y" => ""); + +my @data; +my $kind; +my ($seqs_r, $desc_r); + +my $CONFIDENCE_LIMIT = 0.90; + +################################################################################ +###### SOME FUNCTIONS ###### +################################################################################ + +=head1 APPENDIX + +=cut + +=head2 getHeightData() + + Usage : my ($height_data_r, $description_data_r, $kind) = + logo::getHeightData($input_data_r, $params); + Returns : * REFERENCE TO array of height data + * REFERENCE TO array of input description strings + * $AA if the data is amino acid, $NA otherise + Args : $input_data_r : input data in CLUSTAL or FASTA formats + : $params : hash of parameters + + getHeightData is the entry point into the logo.pm module. $input_data_r is a + reference to an array of strings containing FASTA or CLUSTAL data, where all + lines whose first character is "#" is considered a comment line. + + $params is a hash of parameters with the following keys: + * smallsampletoggle : 0 to turn off small sample correction, otherwise + small sample correction is on + * input_kind : 0 for amino acids, 1 for nucleic acids; if undefined, + logo.pm will attempt to automatically detect whether the + input consists of amino or nucleic acid data. If + $input_kind is defined, only those residues defined by + $input_kind will be in the output -- all other residues are + considered as spaces. For example, if $input_kind is $NA, + the residue "I" or "i" are considered spaces, since "I" and + "i" are not nucleic acid residues. + * stretch : stretch all characters so they are flush at the maximum number + of bits allowed + + Sample use: + + # get FASTA data + open (FASTA, "$fastafile"); + my @inputdata = <FASTA>; + close (FASTA); + + my %heightparams = ( + smallsamplecorrection => 0, + input_kind => 0, + stretch => 0 + ); + + # get height data + my ($heightdata_r, $desc_r, $kind) = logo::getHeightData(\@inputdata, \%heightparams); + +=cut + +# entry point into module +sub getHeightData { + + # $smallsampletoggle is toggle to turn off small sample correction (1 to turn off) + # $input_kind can be $AA or $NA or undef + my ($input_data_r, $params) = @_; + + # gary 040119: adjust for formats (Unix is \n, Mac is \r, Windows is \r\n) + $input_data_r = normalizeData($input_data_r); + + # gets sequences, sets $kind temporarily + my ($goodlength, $maxreslength, $badline, $validformat); + ($seqs_r, $desc_r, $maxreslength, $goodlength, $badline, $validformat) = + getSeqs($input_data_r, $params->{input_kind}); + +# for(my $i = 0; $i < scalar @$seqs_r ; $i++) { +# print STDERR ($desc_r->[$i] . "\n" . $seqs_r->[$i] . "\n"); +# } +# print STDERR "maxreslength = $maxreslength\n"; +# +# exit(0); + + if ($DEBUG) { print STDERR ("point 1\n");} + + # check for valid format + if ((defined $validformat) && ($validformat == 1)) { +# print("returning\n"); + return (undef, undef, undef, undef, undef, 1); + } + + if ($DEBUG) { print STDERR ("point 2\n");} + + # check for bad length + if (!$goodlength) { + return (undef, undef, undef, $goodlength, $badline); + } + + # reset $kind if in $input_kind + if (defined $params->{input_kind} && isLegalKind($params->{input_kind}) ) { + $kind = $params->{input_kind}; + } + + # build data + buildData(@$seqs_r, $params->{smallsampletoggle}, $params->{stretch}, $maxreslength); + + if ($DEBUG) { print STDERR ("point 3\n");} + +# print STDERR ("data size = ", scalar @data, "\n"); +# foreach (@data) { +# print STDERR ("$_\n"); +# } +# +# exit(0); +# +# print STDERR ("return at 2\n"); + return (\@data, $desc_r, $kind, $goodlength, $badline); +} + +sub isLegalKind { + return ($_[0] =~ /^[01]$/); +} + +################################################################################ +# +# sub normalizeData($data_r) returns $data_r, with Mac/Unix/Windows newline +# style normalized to standard Unix-style newline style +# +################################################################################ +sub normalizeData { + my ($data_r) = @_; + + # check args + if (not defined $data_r) { + die "data_r must be defined\n"; + } + + my @normalized = (); + foreach my $pseudo_line (@$data_r) { + my @split_line = split(/[\r\n]+/, $pseudo_line); + push(@normalized, @split_line); + } + + return \@normalized; +} + + +################################################################################ +# +# sub getSeqs($data_r, $kind) returns 5 values: +# +# * array reference to sequence strings +# * array reference to sequence names +# * length of sequence +# * 1 if all sequences have the same length, 0 else +# * line number L where sequenceLength(L) != sequenceLength(other lines), else +# undef +# +################################################################################ +sub getSeqs { + my ($input_data_r, $kind) = @_; + + unless( $input_data_r->[0] ){ + return (undef, undef, undef, undef, undef, 1); + } + + # skip all comment chars and lines of all spaces + while ( ($input_data_r->[0] =~ /^\s*\#/) || ($input_data_r->[0] =~ /^\s*$/) ) { + shift @$input_data_r; + if( !defined $input_data_r->[0]) {return (undef, undef, undef, undef, undef, 1);} + } + + if (isFormat_FASTA($input_data_r)) { + return getSeqs_FASTA($input_data_r, $kind); + + } elsif (isFormat_CLUSTAL($input_data_r)) { + return getSeqs_CLUSTAL($input_data_r, $kind); + + } elsif (isFormat_FLAT($input_data_r)) { + return getSeqs_FLAT($input_data_r, $kind); + + } else { + if ($DEBUG) {print STDERR ("format nothing\n");} + return (undef, undef, undef, undef, undef, 1); + } + +# if ($_[0] =~ />/) { +# return getSeqs_FASTA(@_); +# } else { +# return getSeqs_CLUSTAL(@_); +# } +} + +################################################################################ +# +# sub isFormat_FASTA($data_r) returns 1 if $data_r is in FASTA format +# +################################################################################s +sub isFormat_FASTA { + my ($input_data_r) = @_; + + # check args + if (not defined $input_data_r) { + Carp::confess("logo::isFormat_FASTA : input_data_r must be defined\n"); + } + + if ($input_data_r->[0] =~ />/) { + return 1; + } else { + return 0; + } +} + +################################################################################ +# +# sub isFormat_CLUSTAL($data_r) returns 1 if $data_r is in CLUSTAL format +# +################################################################################ +sub isFormat_CLUSTAL { + my ($input_data_r) = @_; + + # check args + if (not defined $input_data_r) { + Carp::confess("logo::isFormat_CLUSTAL : input_data_r must be " . + "defined\n"); + } + + my $i=0; + +# # skip spaces or just "*" and "." and ":" +# while ($input_data_r->[$i] =~ /^[\*\:\s]*$/) { +# $i++; +# } + + # if it looks like CLUSTAL W (version) ... , then it must be clustal + if ($input_data_r->[$i] =~ /^\s*CLUSTAL/) { + return 1; + } + + # CLUSTAL looks like: "name seq" + if ($input_data_r->[$i] =~ /^\s*(\S+)\s+(\S+)\s*$/) { + return 1; + } else { + return 0; + } +} + +################################################################################ +# +# sub isFormat_FLAT($data_r) returns 1 if $data_r is in FLAT format +# +################################################################################ +sub isFormat_FLAT { + my ($input_data_r) = @_; + + # check args + if (not defined $input_data_r) { + Carp::confess("logo::isFormat_FLAT : input_data_r must be defined\n"); + } + +# print("checking flat\n"); +# print("first element = -->", $input_data_r->[0], "<--\n"); + + if ($input_data_r->[0] =~ /^[a-zA-Z\-]+\s*$/) { + return 1; + } else { + return 0; + } +} + +################################################################################ +###### FORMATTING FUNCTIONS ###### +################################################################################ + +# the flat sequence format is as follows: +# sequence1 +# sequence2 +# sequence3 +# ... +# sequenceN +sub getSeqs_FLAT { + + if ($DEBUG) {print STDERR "DOING FLAT\n";} + + my ($input_data_r, $input_kind) = @_; + + my $linelength = 0; + my $seqCount = 0; + my $total_residues = 0; + my (@returnVal, @desc) = (); + my $prevlinelength = undef; + my $NA_count = 0; + + foreach my $seq (@$input_data_r) { +# chomp $seq; + $seq =~ s/\s+$//; + + my @chars = split(//,$seq); + + my $char; + foreach (@chars) { + $total_residues++; + $linelength++; + + # set $char + if (defined $input_kind) { + if ($input_kind == $AA) { + $char = (isAA($_)) ? $_ : "-"; + } else { # == $NA + $char = (isNA($_)) ? $_ : "-"; + } + } else { + $char = $_; + if (isNA($char)) { + $NA_count++; + } + } + + $returnVal[$seqCount] .= $char; + } + $desc[$seqCount] = "no name"; + + if ($seqCount == 0) { + $prevlinelength = $linelength; + } elsif ($prevlinelength != $linelength) { # different number of residues, so complain + return (undef, undef, undef, 0, $seq); # 0 for not same length, $seq is name + } + $linelength=0; + + $seqCount++; + } + + # determine whether to use $NA or $AA + if (!defined $input_kind) { + if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) { + $kind = $NA; + } else { + $kind = $AA; + } + } + + return (\@returnVal, \@desc, $prevlinelength, 1, undef); +} + +sub getSeqs_CLUSTAL { + + if ($DEBUG) {print STDERR "DOING CLUSTAL\n";} + + my ($input_data_r, $input_kind) = @_; + + my @returnVal; + my @desc; + my $seqCount=0; + my $reslength = 0; + my ($name, $seq); + +# my $input_kind = pop @_; +# my $CONFIDENCE_LIMIT = 0.90; + my $NA_count = 0; + my $total_residues = 0; + my ($prevlinelength, $linelength) = (0,0); + +# foreach (@_) { + foreach (@$input_data_r) { +# chomp; + + if ($DEBUG) {print STDERR ("line = $_\n")}; + + $_ =~ s/\s+$//; + + # skip if it is a comment character -- first character is "#" + next if (/^\s*\#/); + + # skil if it is a CLUSTAL W header line + next if (/^\s*CLUSTAL/); + + # if spaces or just "*" and "." and ":" + if (/^[\*\.\:\s]*$/) { + $seqCount=0; + $prevlinelength=0; + next; + } + + ($name,$seq) = (/^\s*(\S+)\s+(\S+)\s*$/); + + if ($DEBUG) { print STDERR ("name, seq = $name, $seq\n"); } + + # add new entry + if (!defined $desc[$seqCount]) { + $desc[$seqCount] = $name; + $returnVal[$seqCount] = ""; + } + + if(!defined $seq) {return (undef, undef, undef, undef, undef, 1);} # Something has gone terrible wrong + + my @chars = split(//,$seq); + my $char; + foreach (@chars) { + if ($seqCount == 0) { + $reslength++; # all sequences have same residue length, so only count first one + } + + $total_residues++; + $linelength++; + + # set $char + if (defined $input_kind) { + if ($input_kind == $AA) { + $char = (isAA($_)) ? $_ : "-"; + } else { # == $NA + $char = (isNA($_)) ? $_ : "-"; + } + } else { + $char = $_; + if (isNA($char)) { + $NA_count++; + } + } + + $returnVal[$seqCount] .= $char; + } + + if ($seqCount == 0) { + $prevlinelength = $linelength; + } elsif ($prevlinelength != $linelength) { # different number of residues, so complain + return (undef, undef, undef, 0, $name); + } + $linelength=0; + + $seqCount++; + } + + # determine whether to use $NA or $AA + if (!defined $input_kind ) { + if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) { + $kind = $NA; + } else { + $kind = $AA; + } + } + + return (\@returnVal, \@desc, $reslength, 1, undef); +} + +# if $input_kind is defined, residues that are not defined are set to space +sub getSeqs_FASTA { + + if ($DEBUG) {print STDERR "DOING FASTA\n";} + + my ($input_data_r, $input_kind) = @_; + + my @returnVal; + my @desc; + my $count=-1; + my $newElem=0; + +# my $input_kind = pop @_; + +# my $CONFIDENCE_LIMIT = 0.90; + my $NA_count = 0; + my $total_residues = 0; + my $reslength = 0; + my $maxreslength = 0; + + my ($goodlength, $currline, $prevline); + + +# # skip all lines that are all spaces +# while ($_[0] =~ /^\s*$/) { +# shift @_; +# } + +# foreach (@_) { + foreach (@$input_data_r) { + + $_ =~ s/\s+$//; + + # skip if it is a comment character -- first character is "#" + next if (/^\s*\#/); + + # skip all lines that are all spaces + next if (/^\s*$/); + + $_ =~ s/\s+$//; # cut trailing white space + $_ =~ s/^\s+//; # cut leading white space + if (/>/) { + $currline = $_; + ($desc[scalar @desc]) = ($_ =~ />\s*(.+)$/); + + if (not $newElem) { + $count++; + $newElem = 1; + } + } else { + if ($newElem) { + $maxreslength = $reslength if $maxreslength == 0; + if (($maxreslength != 0) && ($maxreslength != $reslength)) { + return (undef, undef, undef, 0, $prevline); + } + + $maxreslength = $reslength; + $reslength = 0; + } + + my @chars = split(//,$_); + my $char; + foreach (@chars) { + $reslength++; + $total_residues++; + + # set $char + if (defined $input_kind) { + if ($input_kind == $AA) { + $char = (isAA($_)) ? $_ : "-"; + } else { # == $NA + $char = (isNA($_)) ? $_ : "-"; + } + } else { + $char = ($_ =~ /[a-zA-Z]/) ? $_ : "-"; # if not alpha char, use space + if (isNA($char) && !isSpace($char)) { + $NA_count++; + } + } + + if ($newElem) { + $returnVal[$count] = $char; + } else { + $returnVal[$count] .= $char; + } + $newElem = 0; + } + $prevline = $currline if $currline =~ />/; + } + } + + # check if last is biggest + if (($maxreslength != 0) && ($maxreslength != $reslength)) { + return (undef, undef, undef, 0, $prevline); + } +# $maxreslength = ($reslength > $maxreslength) ? $reslength : $maxreslength; + + # determine whether to use $NA or $AA + if (!defined $input_kind) { + if ($NA_count / ($total_residues+1) >= $CONFIDENCE_LIMIT) { + $kind = $NA; + } else { + $kind = $AA; + } + } + + return (\@returnVal, \@desc, $maxreslength || $reslength, 1, undef); # 1 for good lengths +} + +sub isSpace { + return $_[0] =~ /[ \-]/; +} + +sub isAA { + return (defined $AMINOACIDS{lc $_[0]}); +} + +sub isNA { + return (defined $BASES{lc $_[0]}); +} + +################################################################################ +###### DATA BUILDING FUNCTIONS ###### +################################################################################ + + +# arguments: takes reference to array and lines aligned sequences of bases or +# amino acids +# returns: updated reference array to reflect contents of sequences sorted +# vertically by height as described by (1) +sub buildData { + + my $currentx = 0; + my $h; + my $count=0; + my $maxreslength = pop (@_); + my $stretch = pop(@_); + my $smallsampletoggle = pop (@_); + my $totalsize = $#_+1; + + while ($currentx < $maxreslength) { #(length $_[0])) { + my $allspaces = 1; + my $spaceCount=0; + + # get vertical sequence + my @vert=(); + foreach (@_) { # foreach sequence + my $currentchar; + + # set currentchar, set to " " if $_ is not long enough + if ($currentx >= (length $_)) { + $currentchar = " "; + } else { + $currentchar = substr($_,$currentx,1); + } + + # in all sequences, "-" is considered as a space + # don't count " " and "-" + if (($currentchar ne "-") && ($currentchar ne " ")) { + $vert[scalar @vert] = uc substr($_,$currentx,1); + $allspaces = 0; + } else { + $spaceCount++; + } + } + + if ($allspaces) { + # build @vert + @vert = (" 0", ">0"); + + # place in @data + $data[scalar @data] = \@vert; + + $currentx++; + next; + } + + my $error; + if ($smallsampletoggle) { + $error=getError($kind,$totalsize - $spaceCount); + } else { + $error = 0; + } + + # sort vertical sequence by amino acid or base + @vert = sort(@vert); + my $total = $#vert + 1; + + # find H(l) -- must be done before collapsing + $h = getH(@vert); + + # collect like terms + @vert = collapse(@vert); + + # get R(l) + my $r; + if (!defined $stretch || !$stretch) { + $r= getR($kind, $h, $error); + } else { + $r = ($kind == $NA) ? 2 : (log(20) / log(2)); + } + + # place heights + my $count=0; + my $height; + my $elem; + foreach $elem (@vert) { + $height = getHeight(substr($elem, 2) / $total,$r); + $vert[$count] = substr($elem,0,1) . $height; + $count++; + } + + # sort by height + @vert = sort height_sort @vert; + + # put in error bar size + $vert[$count] = ">$error"; + + # place in @data + $data[scalar @data] = \@vert; + + $currentx++; + } +} + +# uses error approximation given by: +# e := (s-1) / (2 * ln2 * ntrue); +sub getError { + return ((($_[0] == $NA) ? 4 : 20) - 1) / (2 * log(2) * $_[1]); +} + +sub height_sort { + my ($lettera, $heighta) = ($a =~ /^(.{1})(\S+)$/); #substr($a,1); + my ($letterb, $heightb) = ($b =~ /^(.{1})(\S+)$/); #substr($b,1); + + # compare by height first, then letter + if ($heighta <=> $heightb) { + return $heighta <=> $heightb; + } else { + return $letterb cmp $lettera; #reversed for some reason... + } +} + +sub collapse { + my @returnVal; + my $current = $_[0]; + my $count=0; + my $freq; + + foreach (@_) { + if ($current eq $_) { + $count++; + } else { + $returnVal[scalar @returnVal] = "$current $count"; + + $current = $_; + $count=1; + } + } + + # add last element + $returnVal[scalar @returnVal] = "$current $count"; + + return @returnVal; +} + +# arguments : $_[0] : list of bases or amino acids +sub getH { + my $h = 0; + my (@vert) = @_; # vertical sequence (comparing multiple sequences) + + my $current = $vert[0]; + my $count=0; + my $freq; + + foreach (@vert) { + if ($current eq $_) { + $count++; + } else { + $freq = $count / ($#vert + 1); + $h += $freq * (log ($freq) / log(2)); + + $current = $_; + $count=1; + } + } + + # add last element + $freq = $count / ($#vert + 1); + $h += $freq * (log ($freq) / log(2)); + + return -$h; +} + +# arguments : $_[0] : $AA or $NA +# $_[1] : uncertainty = H(l) +# $_[2] : error correction factor for small number of sequences +# = e(n) ; assummed to be 0 if not given +sub getR { + my $max = ($_[0] == $NA) ? 2 : (log(20) / log(2)); + my $e = (defined $_[2]) ? $_[2] : 0; + return ($max - ($_[1] + $e)); +} + +# arguments: $_[0] : frequency = f(b,l) +# $_[1] : R(l) +sub getHeight { + return $_[0] * $_[1] +} + +1;
--- a/mutation_analysis.r Mon Aug 18 04:42:59 2014 -0400 +++ b/mutation_analysis.r Wed Sep 10 10:33:29 2014 -0400 @@ -294,35 +294,15 @@ cat(length(dat$Sequence.ID), file="n.txt") - - - - - - - +weblogo = dat[,c("Sequence.ID", "VGene")] +weblogo$VGene = gsub("\\*.*", "", weblogo$VGene) - - - - - - - +rs12 = read.table("HS12RSS.txt", sep="\t", header=TRUE) +rs23 = read.table("HS23RSS.txt", sep="\t", header=TRUE) - - - - - - - +result12 = merge(weblogo, rs12, by.x="VGene", by.y="Gene") +result23 = merge(weblogo, rs23, by.x="VGene", by.y="Gene") - - - +write.table(x=result12$Sequence, file="weblogo_in_rs12.txt", sep=",",quote=F,row.names=F,col.names=F) +write.table(x=result23$Sequence, file="weblogo_in_rs23.txt", sep=",",quote=F,row.names=F,col.names=F) - - - -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seqlogo Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,356 @@ +#!/usr/bin/perl -w + +=head1 NAME + + seqlogo - runs the logo creation script + +=head1 SYNOPSIS + + seqlogo [OPTION]...-f [FILENAME] + +=head1 DESCRIPTION + + Creates a logo for the given input filename. + + Available options: + -B <bar bits> Number of bits in bar (real between 0, 1) + -T <tic bits> Number of bits between tic marks + -C <chars per line> Number of characters per line of logo + -d <box shrink factor> Shrink factor of characters if option c is toggled + -E <error bar fraction> Fraction of error bar to show (real between 0, 1 ) + -f <input file> Input filename + -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT + -h <logo height> Height of output logo (real > 0) + -k <kind of data> 0 for amino acid, 1 for nucleic acid ; if not + defined, a 90% certainty method is used to + determine whether the input data is amino acid or + nucleic acid + -l <sequence lower bound> Lower bound of sequence (integer) + -m <sequence upper bound> Upper bound of sequence (integer) + -o <output file> Name of output file + -s <sequence start> Sequence start number, defaults to 1 (int) + -t <titletext> Text of title, enclosed in "" if more than one word + -w <logo width> Width of output logo + -x <x-axis label> Label for x-axis + -y <y-axis label> Label for y-axis + + Available toggles (no values associated) + -a Toggle antialiasing + -b Toggle bar ends + -c Toggle color + -e Toggle error bar + -M Toggle small sample correction + -O Toggle outlining of characters + -n Toggle numbering of x-axis + -S Toggle stretching of logos to entire length + -X Toggle boxing of characters + -Y Toggle y-axis + +=head1 EXAMPLE + + The following command takes as input "input.fasta" and returns the logo in the + form "logo.eps". Antialiasing, bar ends, color, small sample correction, + numbering of x-axis, and y-axis labelling are turned on: + + seqlogo -f input.fasta -F EPS -o logo.eps -abcMnY + +=cut + +use vars qw($PATH); + +BEGIN { + + use FindBin qw($Bin); + use lib "$Bin"; + + $PATH = $Bin; + +## $PATH = "/h/gary/Seqlogo/Code/"; +# $PATH = "/n/weblogo/home/httpd/weblogo/pub/beta/Seqlogo/Code"; +# unshift(@INC, $PATH); +} + +use logo; +use template; +use Getopt::Std; +use FileHandle; + + +my $opts; +$opts = + $opt_a || # antialiasing + $opt_b || # bar ends (0 or 1) + $opt_c || # color + $opt_e || # show error bar (0 or 1) + $opt_n || # numbering (0 or 1) + $opt_M || # small sample correction (0 or 1) + $opt_O || # outline (0 or 1) + $opt_S || # stretch + $opt_X || # box (0 for no box, 1 for box) + $opt_Y || # y axis + + $opt_B || # bar bits (real) + $opt_T || # tics bits (real) + $opt_C || # chars per line + $opt_d || # box shrinking factor (<1) + $opt_E || # error bar fraction (real) + $opt_f || # input filename + $opt_F || # format (PNG, EPS, PDF, GIF) + $opt_h || # logo height (cm) + $opt_k || # 0 = amino acid, 1 = nucleic acid + $opt_l || # lower bound of sequence to put in logo + $opt_m || # max bound of sequence to put in logo + $opt_o || # output file + $opt_s || # start number for very beginning of sequence + $opt_t || # title text (string) + $opt_w || # logo width (cm) + $opt_x || # x axis label + $opt_y || # y axis label +$opts; + +################################################################################ +##### USAGE ##### +################################################################################ + +sub usage { + my $usage = <<END + +usage: seqlogo -f <input filename> [OPTIONs with values] +Creates a logo for the given input filename. + +Available options: + -B <bar bits> Number of bits in bar (real # > 0) + -T <tic bits> Number of bits between tic marks + -C <chars per line> Number of characters per line of logo + -d <box shrink factor> Shrink factor of characters if option c is toggled + -E <error bar fraction> Fraction of error bar to show (real # > 0) + -f <input filename> Input filename + -F <format> Format of output (EPS, GIF, PDF, PNG), - for STDOUT + -h <logo height> Height of output logo (real # > 0) + -k <kind of data> 0 for amino acid, 1 for nucleic acid + -l <sequence lower bound> Lower bound of sequence (integer) + -m <sequence upper bound> Upper bound of sequence (integer) + -o <output file> Name of output file + -s <sequence start> Sequence start number, defaults to 1 (int) + -t <titletext> Text of title, enclosed in "" if more than one word + -w <logo width> Width of output logo + -x <x-axis label> Label for x-axis + -y <y-axis label> Label for y-axis + +Available toggles (no values associated) bOenc + -a Toggle antialiasing + -b Toggle bar ends + -c Toggle color + -e Toggle error bar + -M Toggle small sample correction + -O Toggle outlining of characters + -p Toggle fineprint + -n Toggle numbering of x-axis + -S Toggle stretching of logos to entire length + -X Toggle boxing of characters + -Y Toggle y-axis + +END + ; + + return $usage; +} + +################################################################################ +##### MAIN FUNCTION ##### +################################################################################ + +# arguments : $_[0] : file name +MAIN: { + init(); + + # feed data from file to make height data array reference + my @input = <INPUTFILE>; + close (INPUTFILE); + my %heightparams = ( + smallsampletoggle => $opt_M, + input_kind => $opt_k, + stretch => $opt_S + ); + + my ($heightdata_r, $desc_r, $kind, $goodlength, $badline, $validformat) = + logo::getHeightData(\@input, \%heightparams); + + # check for errors + if ((defined $validformat) && ($validformat == 1)) { + die("Error: Invalid input format does not conform to FASTA, " . + "CLUSTAL, or Flat.\n"); + } + if (!$goodlength) { + die("Error: Number of characters in each logo line is not " . + "consistent, starting at: ", $badline, "\n"); + } + + my %input = ( + LOGO_HEIGHT => $opt_h, + LOGO_WIDTH => $opt_w, + COLORSCHEME => ($opt_c) ? "DEFAULT" : "BW", + + LOGOSTART => $opt_l, + LOGOEND => $opt_m, + START_NUM => $opt_s, + + TITLETEXT => $opt_t, + YAXIS_LABEL => $opt_y, + XAXIS_LABEL => $opt_x, + + BOXSHRINK => $opt_d, + CHARSPERLINE => $opt_C, + BARBITS => $opt_B, + TICBITS => $opt_T, + RES => "96", + "FORMAT" => (uc $opt_F), + + # toggles + ANTIALIAS => $opt_a, + ERRBAR => $opt_e, + FINEPRINT => $opt_p, + NUMBERING => $opt_n, + OUTLINE => $opt_O, + SHOWENDS => $opt_b, + SHOWINGBOX => $opt_X, + YAXIS => $opt_Y + ); + + template::create_template(\%input, $kind, $desc_r, $heightdata_r, $opt_o, $PATH); +} + + +################################################################################ +##### FUNCTINOS FOR INIT ##### +################################################################################ + +# all ints +sub isInt { + return ($_[0] =~ /^[\+\-]?\d+$/) ? 1 : 0; +} + +# all reals +sub isReal { + return ($_[0] =~ /^[\+\-]?\d*.\d*?$/) ? 1 : 0; +} + +sub isZeroOrOne { + return ($_[0] == 0 || $_[0] == 1) ? 1 : 0; +} + +sub init { + +# if (not defined $PATH) { +# die ("PATH must be defined\n"); +# } elsif (not -e $PATH) { +# die ("PATH ($PATH) must exist\n"); +# } elsif (not -d $PATH) { +# die ("PATH ($PATH) must be a directory\n"); +# } + + &getopts('T:B:C:d:E:f:F:h:k:l:m:o:s:t:w:x:y:abcenMOpSXY'); + + if (defined $opt_B && + (!isReal($opt_B) || $opt_B < 0) ) { + printf("\noption B must be a positive real, but is $opt_B, $!\n"); + die &usage(); + } + if (defined $opt_d && + ( !isReal($opt_d) || $opt_d < 0 || $opt_d > 1) ) { + print("\noption d must be a real between 0 and 1, but is $opt_d, $!\n"); + die &usage(); + } + if (defined $opt_E && + (!isReal($opt_E) || $opt_E < 0 || $opt_E > 1) ) { + print("\noption E must be a real between 0 and 1, but is $opt_E, $!\n"); + die &usage(); + } + if (defined $opt_f) { + open (INPUTFILE, "$opt_f") or die "Couldn't open input filename $opt_f: $!\n"; + } else { + print("\ninput file not specified, terminating...\n"); + die &usage(); + } + if (defined $opt_h && + (!isReal($opt_h) || $opt_h < 0) ) { + print("\noption h must be a positive real, but is $opt_h, $!\n"); + die &usage(); + } + if (defined $opt_w && + (!isReal($opt_w) || $opt_w < 0) ) { + print("\noption w must be a positive real, but is $opt_w, $!\n"); + die &usage(); + } + if (defined $opt_k && + (!isZeroOrOne($opt_k)) ) { + print("\noption k must be 0 or 1, but is $opt_k, $!\n"); + die &usage(); + } + + #toggles + if (!defined $opt_a) { + $opt_a = 0; + } + if (!defined $opt_b) { + $opt_b = 0; + } + if (!defined $opt_c) { + $opt_c = 0; + } + if (!defined $opt_e) { + $opt_e = 0; + } + if (!defined $opt_n) { + $opt_n = 0; + } + if (!defined $opt_M) { + $opt_M = 0; + } + if (!defined $opt_O) { + $opt_O = 0; + } + if (!defined $opt_p) { + $opt_p = 0; + } + if (!defined $opt_S) { + $opt_S = 0; + } + if (!defined $opt_X) { + $opt_X = 0; + }; + if (!defined $opt_Y) { + $opt_Y = 0; + }; + + if (!defined $opt_F) { + $opt_F = "EPS"; # default to EPS + } + if (!defined $opt_o) { + $opt_o = "-"; # for standard out + } else { +# $opt_o =~ s/\.\S*$//; # remove extension if there is one + $opt_o .= "." . (lc $opt_F); # make file name + } + + if (defined $opt_C && + (!isInt($opt_C) || $opt_C < 0) ) { + printf("\noption C must be a postive integer, but is $opt_C, $!\n"); + die &usage(); + } + + if (defined $opt_l && !isInt($opt_l)) { + printf("\noption l must be an integer, but is $opt_l, $!\n"); + die &usage(); + } + + if (defined $opt_m && !isInt($opt_m)) { + printf("\noption m must be an integer, but is $opt_m, $!\n"); + die &usage(); + } + + if (defined $opt_s && !isInt($opt_s)) { + printf("\noption s must be an integer, but is $opt_s, $!\n"); + die &usage(); + } +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/template.eps Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,674 @@ +%!PS-Adobe-3.0 EPSF-3.0 +%%Title: Sequence Logo : {$TITLE} +%%Creator: {$CREATOR} +%%CreationDate: {$CREATIONDATE} +%%BoundingBox: 0 0 {$LOGOWIDTHPOINTS} {$LOGOHEIGHTPOINTS} +%%Pages: 0 +%%DocumentFonts: +%%EndComments + +{$DESC} + + +% ---- CONSTANTS ---- +/cmfactor 72 2.54 div def % defines points -> cm conversion +/cm {cmfactor mul} bind def % defines centimeters + + +% ---- VARIABLES ---- + +{$COLORDEF} + +/logoWidth {$LOGOWIDTH} cm def +/logoHeight {$LOGOLINEHEIGHT} cm def +/logoTitle ({$TITLE}) def + +/yaxis {$YAXIS} def +/yaxisLabel ({$YAXIS_LABEL}) def +/yaxisBits {$BARBITS} def % bits +/yaxisTicBits {$TICBITS} def + + +/xaxis {$NUMBERING} def +/xaxisLabel ({$XAXIS_LABEL}) def +/showEnds ({$SHOWENDS}) def % d: DNA, p: PROTEIN, -: none + +/showFineprint true def +/fineprint ({$FINEPRINT}) def + +/charsPerLine {$CHARSPERLINE} def +/logoLines {$LOGOLINES} def + +/showingBox ({$SHOWINGBOX}) def %n s f +/shrinking {$SHRINKBOOLEAN} def +/shrink {$SHRINKFACTOR} def +/outline {$OUTLINE} def + +/IbeamFraction {$ERRORBARFRACTION} def +/IbeamGray 0.50 def +/IbeamLineWidth 0.5 def + +/fontsize 12 def +/titleFontsize 14 def +/smallFontsize 6 def + +/defaultColor {$DEFAULT_COLOR} def + +{$COLORDICT} + +% Standard DNA/RNA color scheme +% /colorDict << +% (G) orange +% (T) red +% (C) blue +% (A) green +% (U) red +% >> def + +% Standard Amino Acid colors +%/colorDict << +% (G) green +% (S) green +% (T) green +% (Y) green +% (C) green +% (N) purple +% (Q) purple +% (K) blue +% (R) blue +% (H) blue +% (D) red +% (E) red +% (P) black +% (A) black +% (W) black +% (F) black +% (L) black +% (I) black +% (M) black +% (V) black +%>> def + + + +% ---- DERIVED PARAMETERS ---- + +/leftMargin + fontsize 3.5 mul + +def + +/bottomMargin + fontsize 0.75 mul + + % Add extra room for axis + xaxis {fontsize 1.75 mul add } if + xaxisLabel () eq {} {fontsize 0.75 mul add} ifelse +def + + +/topMargin + logoTitle () eq { 10 }{titleFontsize 4 add} ifelse +def + +/rightMargin + %Add extra room if showing ends + showEnds (-) eq { fontsize}{fontsize 1.5 mul} ifelse +def + +/yaxisHeight + logoHeight + bottomMargin sub + topMargin sub +def + +/ticWidth fontsize 2 div def + +/pointsPerBit yaxisHeight yaxisBits div def + +/isBoxed + showingBox (s) eq + showingBox (f) eq or { + true + } { + false + } ifelse +def + +/stackMargin 1 def + +% Do not add space aroung characters if characters are boxed +/charRightMargin + isBoxed { 0.0 } {stackMargin} ifelse +def + +/charTopMargin + isBoxed { 0.0 } {stackMargin} ifelse +def + +/charWidth + logoWidth + leftMargin sub + rightMargin sub + charsPerLine div + charRightMargin sub +def + +/charWidth4 charWidth 4 div def +/charWidth2 charWidth 2 div def + +/stackWidth + charWidth charRightMargin add +def + +/numberFontsize + fontsize charWidth lt {fontsize}{charWidth} ifelse +def + +% movements to place 5'/N and 3'/C symbols +/leftEndDeltaX fontsize neg def +/leftEndDeltaY fontsize 1.5 mul neg def +/rightEndDeltaX fontsize 0.25 mul def +/rightEndDeltaY leftEndDeltaY def + +% Outline width is proporional to charWidth, +% but no less that 1 point +/outlinewidth + charWidth 32 div dup 1 gt {}{pop 1} ifelse +def + + +% ---- PROCEDURES ---- + +/StartLogo { + % Save state + save + gsave + + % Print Logo Title, top center + gsave + SetTitleFont + + logoWidth 2 div + logoTitle + stringwidth pop 2 div sub + logoHeight logoLines mul + titleFontsize sub + moveto + + logoTitle + show + grestore + + % Print X-axis label, bottom center + gsave + SetStringFont + + logoWidth 2 div + xaxisLabel stringwidth pop 2 div sub + fontsize 3 div + moveto + + xaxisLabel + show + grestore + + % Show Fine Print + showFineprint { + gsave + SetSmallFont + logoWidth + fineprint stringwidth pop sub + smallFontsize sub + smallFontsize 3 div + moveto + + fineprint show + grestore + } if + + % Move to lower left corner of last line, first stack + leftMargin bottomMargin translate + + % Move above first line ready for StartLine + 0 logoLines logoHeight mul translate + + SetLogoFont +} bind def + +/EndLogo { + grestore + showpage + restore +} bind def + + +/StartLine{ + % move down to the bottom of the line: + 0 logoHeight neg translate + + gsave + yaxis { MakeYaxis } if + xaxis { ShowLeftEnd } if +} bind def + +/EndLine{ + xaxis { ShowRightEnd } if + grestore +} bind def + + +/MakeYaxis { + gsave + stackMargin neg 0 translate + ShowYaxisBar + ShowYaxisLabel + grestore +} bind def + + +/ShowYaxisBar { + gsave + SetStringFont + + /str 10 string def % string to hold number + /smallgap stackMargin 2 div def + + % Draw first tic and bar + gsave + ticWidth neg 0 moveto + ticWidth 0 rlineto + 0 yaxisHeight rlineto + stroke + grestore + + + % Draw the tics + % initial increment limit proc for + 0 yaxisTicBits yaxisBits abs %cvi + {/loopnumber exch def + + % convert the number coming from the loop to a string + % and find its width + loopnumber 10 str cvrs + /stringnumber exch def % string representing the number + + stringnumber stringwidth pop + /numberwidth exch def % width of number to show + + /halfnumberheight + stringnumber CharBoxHeight 2 div + def + + numberwidth % move back width of number + neg loopnumber pointsPerBit mul % shift on y axis + halfnumberheight sub % down half the digit + + moveto % move back the width of the string + + ticWidth neg smallgap sub % Move back a bit more + 0 rmoveto % move back the width of the tic + + stringnumber show + smallgap 0 rmoveto % Make a small gap + + % now show the tic mark + 0 halfnumberheight rmoveto % shift up again + ticWidth 0 rlineto + stroke + } for + grestore +} bind def + +/ShowYaxisLabel { + gsave + SetStringFont + + % How far we move left depends on the size of + % the tic labels. + /str 10 string def % string to hold number + yaxisBits yaxisTicBits div cvi yaxisTicBits mul + str cvs stringwidth pop + ticWidth 1.5 mul add neg + + + yaxisHeight + yaxisLabel stringwidth pop + sub 2 div + + translate + 90 rotate + 0 0 moveto + yaxisLabel show + grestore +} bind def + + +/StartStack { % <stackNumber> startstack + xaxis {MakeNumber}{pop} ifelse + gsave +} bind def + +/EndStack { + grestore + stackWidth 0 translate +} bind def + + +% Draw a character whose height is proportional to symbol bits +/MakeSymbol{ % charbits character MakeSymbol + gsave + /char exch def + /bits exch def + + /bitsHeight + bits pointsPerBit mul + def + + /charHeight + bitsHeight charTopMargin sub + dup + 0.0 gt {}{pop 0.0} ifelse % if neg replace with zero + def + + charHeight 0.0 gt { + char SetColor + charWidth charHeight char ShowChar + + showingBox (s) eq { % Unfilled box + 0 0 charWidth charHeight false ShowBox + } if + + showingBox (f) eq { % Filled box + 0 0 charWidth charHeight true ShowBox + } if + + } if + + grestore + + 0 bitsHeight translate +} bind def + + +/ShowChar { % <width> <height> <char> ShowChar + gsave + /tc exch def % The character + /ysize exch def % the y size of the character + /xsize exch def % the x size of the character + + /xmulfactor 1 def + /ymulfactor 1 def + + + % if ysize is negative, make everything upside down! + ysize 0 lt { + % put ysize normal in this orientation + /ysize ysize abs def + xsize ysize translate + 180 rotate + } if + + shrinking { + xsize 1 shrink sub 2 div mul + ysize 1 shrink sub 2 div mul translate + + shrink shrink scale + } if + + % Calculate the font scaling factors + % Loop twice to catch small correction due to first scaling + 2 { + gsave + xmulfactor ymulfactor scale + + ysize % desired size of character in points + tc CharBoxHeight + dup 0.0 ne { + div % factor by which to scale up the character + /ymulfactor exch def + } % end if + {pop pop} + ifelse + + xsize % desired size of character in points + tc CharBoxWidth + dup 0.0 ne { + div % factor by which to scale up the character + /xmulfactor exch def + } % end if + {pop pop} + ifelse + grestore + } repeat + + % Adjust horizontal position if the symbol is an I + tc (I) eq { + charWidth 2 div % half of requested character width + tc CharBoxWidth 2 div % half of the actual character + sub 0 translate + % Avoid x scaling for I + /xmulfactor 1 def + } if + + + % ---- Finally, draw the character + + newpath + xmulfactor ymulfactor scale + + % Move lower left corner of character to start point + tc CharBox pop pop % llx lly : Lower left corner + exch neg exch neg + moveto + + outline { % outline characters: + outlinewidth setlinewidth + tc true charpath + gsave 1 setgray fill grestore + clip stroke + } { % regular characters + tc show + } ifelse + + grestore +} bind def + + +/ShowBox { % x1 y1 x2 y2 filled ShowBox + gsave + /filled exch def + /y2 exch def + /x2 exch def + /y1 exch def + /x1 exch def + newpath + x1 y1 moveto + x2 y1 lineto + x2 y2 lineto + x1 y2 lineto + closepath + + clip + + filled { + fill + }{ + 0 setgray stroke + } ifelse + + grestore +} bind def + + +/MakeNumber { % number MakeNumber + gsave + SetNumberFont + stackWidth 0 translate + 90 rotate % rotate so the number fits + dup stringwidth pop % find the length of the number + neg % prepare for move + stackMargin sub % Move back a bit + charWidth (0) CharBoxHeight % height of numbers + sub 2 div % + moveto % move back to provide space + show + grestore +} bind def + + +/Ibeam{ % heightInBits Ibeam + gsave + % Make an Ibeam of twice the given height in bits + /height exch pointsPerBit mul def + /heightDRAW height IbeamFraction mul def + + IbeamLineWidth setlinewidth + IbeamGray setgray + + charWidth2 height neg translate + ShowIbar + newpath + 0 0 moveto + 0 heightDRAW rlineto + stroke + newpath + 0 height moveto + 0 height rmoveto + currentpoint translate + ShowIbar + newpath + 0 0 moveto + 0 heightDRAW neg rlineto + currentpoint translate + stroke + grestore +} bind def + + +/ShowIbar { % make a horizontal bar + gsave + newpath + charWidth4 neg 0 moveto + charWidth4 0 lineto + stroke + grestore +} bind def + + +/ShowLeftEnd { + gsave + SetStringFont + leftEndDeltaX leftEndDeltaY moveto + showEnds (d) eq {(5) show ShowPrime} if + showEnds (p) eq {(N) show} if + grestore +} bind def + + +/ShowRightEnd { + gsave + SetStringFont + rightEndDeltaX rightEndDeltaY moveto + showEnds (d) eq {(3) show ShowPrime} if + showEnds (p) eq {(C) show} if + grestore +} bind def + + +/ShowPrime { + gsave + SetPrimeFont + (\242) show + grestore +} bind def + + +/SetColor{ % <char> SetColor + dup colorDict exch known { + colorDict exch get aload pop setrgbcolor + } { + pop + defaultColor aload pop setrgbcolor + } ifelse +} bind def + +% define fonts +/SetTitleFont {/Times-Bold findfont titleFontsize scalefont setfont} bind def +/SetLogoFont {/Helvetica-Narrow-Bold findfont charWidth scalefont setfont} bind def +/SetStringFont{/Helvetica-Bold findfont fontsize scalefont setfont} bind def +/SetPrimeFont {/Symbol findfont fontsize scalefont setfont} bind def +/SetSmallFont {/Helvetica findfont smallFontsize scalefont setfont} bind def + +/SetNumberFont { + /Helvetica-Bold findfont + numberFontsize + scalefont + setfont +} bind def + +%Take a single character and return the bounding box +/CharBox { % <char> CharBox <lx> <ly> <ux> <uy> + gsave + newpath + 0 0 moveto + % take the character off the stack and use it here: + true charpath + flattenpath + pathbbox % compute bounding box of 1 pt. char => lx ly ux uy + % the path is here, but toss it away ... + grestore +} bind def + + +% The height of a characters bounding box +/CharBoxHeight { % <char> CharBoxHeight <num> + CharBox + exch pop sub neg exch pop +} bind def + + +% The width of a characters bounding box +/CharBoxWidth { % <char> CharBoxHeight <num> + CharBox + pop exch pop sub neg +} bind def + + +% Deprecated names +/startstack {StartStack} bind def +/endstack {EndStack} bind def +/makenumber {MakeNumber} bind def +/numchar { MakeSymbol } bind def + +%%EndProlog + +%%Page: 1 1 +StartLogo +StartLine % line number 1 + +{$DATA} + +EndLine +EndLogo + +%%EOF + + + + + + + + + + + + + + + + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/template.pm Wed Sep 10 10:33:29 2014 -0400 @@ -0,0 +1,672 @@ +=head1 NAME + + template.pm - creates logo output in various formats + +=head1 SYNOPSIS + + Perl module + +=head1 DESCRIPTION + + logo.cgi and run.pl collect the logo data. They can then enter + template::create_template to create logo output in the following formats: + + * EPS + * GIF + * PDF + * PNG + + + + If the configuration file "logo.conf" exists in the working directory, then + it will be parsed for the locations of GhostScript (gs) and convert. The + following is an + example of the configuration file ("#" at beginning of line indicates + comment): + + # Make configuration changes here. Rename this file to logo.conf + # gs version 5.5 does not work, 6.5 does + # set the PATHS + gs=/usr/local/bin/gs + convert=/usr/X11R6/bin/convert + +=cut + +package template; +use strict; + + +################################################################################ +######## STATE FOR FILLINGS ########## +################################################################################ + +# BARBITS number of bits in vertical y-axis bar +# CHARWIDTH width of characters in logo +# COLORSCHEME "a" for amino acid; +# "n" for nucleic acid; +# "b" for black (no color scheme) +# color scheme +# DATA string of heights of characters in cm +# DESC description of aligned sequences +# FINEPRINT enable adverts/credits +# LOGOLINES number of lines of logo +# LOGOHEIGHT height of EACH LINE in logo (in cm) +# LOGOWIDTH width of final logo in cm +# LOGOHEIGHTPOINTS height of final logo in points +# LOGOWIDTHPOINTS height of final logo in points +# LOGOSTART logo output will begin at residue LOGOSTART +# LOGOEND logo output will end at residue LOGOEND +# ERRBAR 1 to include error bar, 0 to exclude +# ERRORBARFRACTION percent of error bar to show in range [0,1] +# KIND $AA for amino acid, $NA for nucleic acid +# NUMBERING 1 to show residue numbers, 0 to exclude +# OUTLINE 1 to print characters in outline form; +# 0 to print characters in solid form +# SHOWENDS "d" to show 5' and 3' ends; +# "p" to show N and C termini; +# "-" to exclude end markers +# SHOWINGBOX "n" to have No boxes around characters; +# "s" to have boxes around characters, with Shrinking; +# "f" to have Filled boxes around characters +# SHRINKBOOLEAN 1 to shrink characters; 0 to exclude shrinking +# SHRINKFACTOR amount to shrink in range from 1(no shrinking) to +# 0(full shrinking) +# START_NUM start number for very beginning of sequence +# TITLE title of logo +# YAXIS 1 to turn on y axis and its labels + +################################################################################ +##### VARIABLES AND DEFAULT VALUES ##### +################################################################################ + +my %defaults = ( + LOGOHEIGHT => 5, + LOGOWIDTH => 8, + + YAXIS => "false", + TITLE => "", + YAXIS_LABEL => "bits", + XAXIS_LABEL => "", + + BARENDS => "false", + OUTLINE => "false", + SHOWINGBOX => "n", + NUMBERING => "false", + #FINEPRINT => "Created by: G. E. Crooks, G. Hon, J.-M. Chandonia & S. E. Brenner, (2002) <weblogo.berkeley.edu>", + FINEPRINT => "weblogo.berkeley.edu", + + ERRORBARFRACTION => "1", + ERRBAR => "0", + SHRINKFACTOR => "1", + START_NUM => "1", + + DEFAULT_COLOR => "black", + + black => "[0 0 0]", + red => "[0.8 0 0]", + green => "[0 0.8 0]", + blue => "[0 0 0.8]", + yellow => "[1 0.7 1.0]", + purple => "[0.8 0 0.8]", + orange => "[1 0.7 0]" + ); + +my $AA = 0; +my $NA = 1; +my $PATH; + +################################################################################ +##### SOME FUNCTIONS ##### +################################################################################ + + +sub create_template { + my ($input, $kind, $desc_r, $data_r, $outfile, $path) = @_; + + # set path + $PATH = $path; + + #Create EPS file + my %fillings; + + # put parameters in fillings + makeFillings(\%fillings, $input, $kind, $desc_r, $defaults{FINEPRINT}); + + # set default data if not filled + setDefaults(\%fillings, \%defaults, scalar @$data_r); + + # put color in fillings + setColors(\%fillings, \%defaults); + + # put data in fillings + setData(\%fillings, $data_r); + + # make eps output + my $eps = fillTemplate("$PATH/template.eps", \%fillings); + my $format = $input->{FORMAT}; + + # convert + my ($gsprog, $convertprog) = getProgs(); + +# print STDERR "(gsprog, convertprog) = ($gsprog, $convertprog)\n"; + + my $width = $fillings{LOGOWIDTHPOINTS}; + my $height = $fillings{LOGOHEIGHTPOINTS}; # height of entire logo + my $res = $input->{RES}; + my $antialias = (defined $input->{ANTIALIAS} && $input->{ANTIALIAS}) ? "-dTextAlphaBits=4" : ""; + + my $r = $width . "x" . $height; + + if( $format eq "EPS" ) { + if ($outfile eq "-") { # if standard out + print $eps; + } else { + open (WRITEME, ">$outfile"); + print WRITEME $eps; + close (WRITEME); + } + + } elsif ($format eq "PDF"){ +# print("outfile = $outfile\n"); + my $program = "| $gsprog -sOutputFile=$outfile -sDEVICE=pdfwrite -dPDFSETTINGS=/printer -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height -dEmbedAllFonts=true $antialias -dSAFER -dBATCH -dNOPAUSE -_"; + open(WRITEME, $program); + print WRITEME "$eps"; + close (WRITEME); + + } elsif ( $format eq "PNG" ) { + my $program = "| $gsprog -sOutputFile=$outfile -sDEVICE=png16m -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height $antialias -dSAFER -dBATCH -dNOPAUSE -_"; + #print ("$program"); + open(WRITEME, $program); + print WRITEME "$eps"; + close (WRITEME); + + } elsif ($format eq "GIF") { + # convert to EPS first, then GIF + die "Please check logo.conf: convert program does not exist" + if (!defined $convertprog || !(-e $convertprog)); + + my $program = "| $gsprog -sOutputFile=- -sDEVICE=png16m -q -r$res -dDEVICEWIDTHPOINTS=$width -dDEVICEHEIGHTPOINTS=$height $antialias -dSAFER -dBATCH -dNOPAUSE -_"; + + if ($outfile eq "-") { + $program .= " | $convertprog png:- gif:-"; + } else { + $program .= " | $convertprog png:- $outfile"; + } + + open(WRITEME, $program); + print WRITEME "$eps"; + close (WRITEME); + } +} + +#deprecated +sub c { + return create_template( @_); +} + +sub getProgs { + my ($gsprog, $convertprog) = ("gs", "convert"); + + + # No configuration file, then return defaults. + return ($gsprog, $convertprog) if (! (-e "$PATH/logo.conf")); + open (CONF, "$PATH/logo.conf"); + + while (<CONF>) { + next if (/^\#/); # skip lines beginning with "#" + if (m/^gs/i) { # if looks like gs (case insensitive) + ($gsprog) = ($_ =~ /^\S+\=(.+)$/); + } + if (m/^convert/i) { # if looks like convert (case insensitive) + ($convertprog) = ($_ =~ /^\S+\=(.+)$/); + } + } + + # Do these fings exist? + my ($gsprogname) = ($gsprog =~ /^(\S+)/); + + die "Please check $PATH/logo.conf: gs program ($gsprogname) does not exist" if (!defined $gsprogname || !(-e $gsprogname)); + #die "Please check logo.conf: convert program does not exist" if (!defined $convertprog || !(-e $convertprog)); + + return ($gsprog, $convertprog); +} + +sub get_eps { + my ($input, $kind, $desc_r, $data_r) = @_; + my %fillings; + + # put parameters in fillings + makeFillings(\%fillings, $input, $kind, $desc_r); + + # set default data if not filled + setDefaults(\%fillings, \%defaults, $#$data_r); + + # put data in fillings + setData(\%fillings, $data_r); + + # make output + return fillTemplate("$PATH/template.eps", \%fillings); +} + +sub fillTemplate { + my ($filename, $fillings) = @_; + + if (not -e $filename) { + die "filename ($filename) must exist\n"; + } + + my $text; + local $/; # slurp mode (undef) + local *F; # create local filehandle + open(F, "< $filename\0") || return; + $text = <F>; + close(F); + + #replace {$KEYWORDS} with value in %$fillings hash + $text =~ s{ \{\$ (.*?) \} } + { exists( $fillings->{$1}) + ? $fillings->{$1} + : "" + }gsex; + return $text; +} + + +################################################################################ +##### FILL THE FILLINGS HERE ##### +################################################################################ + +sub isChecked { + return 0 if (!defined $_[0]); + return $_[0]; +} + +# negative/positive ints +sub isInt { + return ($_[0] =~ /^[-\+]?\d+$/) ? 1 : 0; +} + +sub makeFillings { + +# my ($fillings, $input, $kind, $desc_r, $data_r, $fineprint) = @_; + my ($fillings, $input, $kind, $desc_r, $fineprint) = @_; + $fillings->{KIND} = $kind; + $fillings->{LOGOHEIGHT} = $input->{LOGO_HEIGHT}; + $fillings->{LOGOWIDTH} = $input->{LOGO_WIDTH}; + $fillings->{OUTLINE} = (isChecked($input->{OUTLINE})) ? "true" : "false"; + $fillings->{NUMBERING} = (isChecked($input->{NUMBERING})) ? "true" : "false"; + $fillings->{FINEPRINT} = (isChecked($input->{FINEPRINT})) ? $fineprint : ""; + + $fillings->{LOGOSTART} = $input->{LOGOSTART}; + $fillings->{LOGOEND} = $input->{LOGOEND}; + $fillings->{START_NUM} = $input->{START_NUM}; + + $fillings->{YAXIS} = (isChecked($input->{YAXIS}) && !isChecked($input->{STRETCH})) ? "true" : "false"; + + + + $fillings->{TITLE} = $input->{TITLETEXT}; + $fillings->{YAXIS_LABEL} = $input->{YAXIS_LABEL}; + + $fillings->{XAXIS_LABEL} = $input->{XAXIS_LABEL}; + $fillings->{ERRBAR} = $input->{ERRBAR}; + $fillings->{SHOWINGBOX} = (isChecked($input->{SHOWINGBOX})) ? "s" : "n"; + $fillings->{SHRINKBOOLEAN} = ($fillings->{SHOWINGBOX} eq "s") ? "true" : "false"; + $fillings->{SHRINKFACTOR} = $input->{BOXSHRINK}; + + if ((defined $input->{CHARSPERLINE}) && + isInt($input->{CHARSPERLINE}) && + ($input->{CHARSPERLINE} > 0)) { + $fillings->{CHARSPERLINE} = $input->{CHARSPERLINE}; + } + + if (defined $input->{BARBITS}) { + $fillings->{BARBITS} = $input->{BARBITS}; + } else { + $fillings->{BARBITS} = ($fillings->{KIND} == $AA) ? 4.3 : 2; + } + + if (defined $input->{TICBITS}) { + $fillings->{TICBITS} = $input->{TICBITS}; + } else { + $fillings->{TICBITS} = 1; + } + + + + +# if (isChecked($input->{NOCOLOR})) { +# $fillings->{COLORSCHEME} = "b"; +# } else { +# $fillings->{COLORSCHEME} = ($kind == $AA) ? "a" : "n"; +# } + + #color + if (defined $input->{DEFAULT_COLOR}) { + $fillings->{DEFAULT_COLOR} = (isHexColor( $input->{DEFAULT_COLOR})) ? "c" . $input->{DEFAULT_COLOR} : + $input->{DEFAULT_COLOR}; + } + + if (isChecked($input->{SHOWENDS})) { + $fillings->{SHOWENDS} = ($fillings->{KIND} == $AA) ? "p" : "d"; + } else { + $fillings->{SHOWENDS} = "-"; + } + + $fillings->{DESC} = getDescription($desc_r, $fillings->{KIND}); + + $fillings->{ERRORBARFRACTION} = $input->{ERRORBARFRACTION}; + $fillings->{COLORSCHEME} = $input->{COLORSCHEME}; + $fillings->{COLORS} = $input->{COLORS}; +} + +sub getDescription { + my $returnVal = ""; + + foreach (@{$_[0]}) { + if(defined($_)) { + $returnVal .= "% * $_\n"; + } else { + $returnVal .= "% * \n"; + } + } + + if ($_[1] == $AA) { + $returnVal .= "% * PROTEIN ALIGNMENT"; + } else { + $returnVal .= "% * NUCLEOTIDE ALIGNMENT"; + } + + return $returnVal; +} + + +################################################################################ +##### SETTING DEFAULTS ##### +################################################################################ + +sub setDefaults { + my ($fillings, $defaults, $numchars) = @_; + + $fillings->{LOGOHEIGHT} = $defaults->{LOGOHEIGHT} if !defined $fillings->{LOGOHEIGHT}; + $fillings->{LOGOWIDTH} = $defaults->{LOGOWIDTH} if !defined $fillings->{LOGOWIDTH}; + + $fillings->{START_NUM} = $defaults->{START_NUM} if !defined $fillings->{START_NUM}; + $fillings->{LOGOSTART} = $fillings->{START_NUM} if !defined $fillings->{LOGOSTART}; + $fillings->{LOGOEND} = $numchars + $fillings->{LOGOSTART} - 1 if !defined $fillings->{LOGOEND}; + + $fillings->{YAXIS} = $defaults->{YAXIS} if !defined $fillings->{YAXIS}; + $fillings->{TITLE} = $defaults->{TITLE} if !defined $fillings->{TITLE} || $fillings->{TITLE} eq ""; + #$fillings->{YAXIS_LABEL} = $defaults->{YAXIS_LABEL} if !defined $fillings->{YAXIS_LABEL} || $fillings->{YAXIS_LABEL} eq ""; + $fillings->{YAXIS_LABEL} = $defaults->{YAXIS_LABEL} if !defined $fillings->{YAXIS_LABEL} ; + $fillings->{XAXIS_LABEL} = $defaults->{XAXIS_LABEL} if !defined $fillings->{XAXIS_LABEL} || $fillings->{XAXIS_LABEL} eq ""; + + $fillings->{BARENDS} = $defaults->{BARENDS} if !defined $fillings->{BARENDS}; + $fillings->{OUTLINE} = $defaults->{OUTLINE} if !defined $fillings->{OUTLINE}; + $fillings->{SHOWINGBOX} = $defaults->{SHOWINGBOX} if !defined $fillings->{SHOWINGBOX}; + $fillings->{NUMBERING} = $defaults->{NUMBERING} if !defined $fillings->{NUMBERING}; + + $fillings->{ERRORBARFRACTION} = $defaults->{ERRORBARFRACTION} if !defined $fillings->{ERRORBARFRACTION}; + $fillings->{SHRINKFACTOR} = $defaults->{SHRINKFACTOR} if !defined $fillings->{SHRINKFACTOR}; + $fillings->{ERRBAR} = $defaults->{ERRBAR} if !defined $fillings->{ERRBAR}; + +# printf("logostart = %d, start num = %d, numchars = $numchars, logoend = %d\n", $fillings->{LOGOSTART}, $fillings->{START_NUM}, +# $fillings->{LOGOEND}); + + my $givenrange = $fillings->{LOGOEND} - $fillings->{LOGOSTART} + 1; + my $possiblerange = $numchars - ($fillings->{LOGOSTART} - $fillings->{START_NUM}); + + if (!defined $fillings->{CHARSPERLINE} && ($givenrange > $possiblerange)) { + $fillings->{CHARSPERLINE} = $numchars - ($fillings->{LOGOSTART} - $fillings->{START_NUM}); + } elsif (!defined $fillings->{CHARSPERLINE}) { + $fillings->{CHARSPERLINE} = $fillings->{LOGOEND} - $fillings->{LOGOSTART} + 1; + } + + $fillings->{DEFAULT_COLOR} = $defaults->{DEFAULT_COLOR} if !defined $fillings->{DEFAULT_COLOR} || + $fillings->{DEFAULT_COLOR} eq ""; + +# printf("chars per line = %s\n",$fillings->{CHARSPERLINE}); +# print("givenrange = $givenrange, possiblerange = $possiblerange\n"); + + if ($givenrange > $possiblerange) { + $fillings->{LOGOLINES} = roundup($possiblerange / $fillings->{CHARSPERLINE}); + } else { + $fillings->{LOGOLINES} = roundup($givenrange / $fillings->{CHARSPERLINE}); + } + + $fillings->{CHARWIDTH} = ($fillings->{LOGOWIDTH} - 1.5) / $fillings->{CHARSPERLINE}; + +# # change height if more than 1 line +# $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54)) * $fillings->{LOGOLINES}; + + # LOGOHEIGHTPOITNS is the height input by the user +# $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54)); # user specifies height of entire logo + $fillings->{LOGOHEIGHTPOINTS} = int($fillings->{LOGOHEIGHT} * (72 / 2.54)) * $fillings->{LOGOLINES}; # user specifies height of logo line + $fillings->{LOGOWIDTHPOINTS} = int($fillings->{LOGOWIDTH} * (72 / 2.54)); + + # LOGOLINEHEIGHT is the height of each logo line, in cm +# $fillings->{LOGOLINEHEIGHT} = $fillings->{LOGOHEIGHT} / $fillings->{LOGOLINES}; # user specifies height of entire logo + $fillings->{LOGOLINEHEIGHT} = $fillings->{LOGOHEIGHT}; # user specifies height of logo line +} + +sub roundup { + return ($_[0] - int($_[0]) > 0) ? int($_[0] + 1) : $_[0]; +} + + +################################################################################ +##### COLORS ##### +################################################################################ + +sub getDefaultColors { + my ($defaults) = @_; + my $returnVal = ""; + $returnVal .= "/black " . $defaults->{black} . " def\n"; + $returnVal .= "/red " . $defaults->{red} . " def\n"; + $returnVal .= "/green " . $defaults->{green} . " def\n"; + $returnVal .= "/blue " . $defaults->{blue} . " def\n"; + $returnVal .= "/yellow " . $defaults->{yellow} . " def\n"; + $returnVal .= "/purple " . $defaults->{purple} . " def\n"; + $returnVal .= "/orange " . $defaults->{orange} . " def\n"; + + return $returnVal; +} + +sub getNAColors { + my $returnVal = <<END +% Standard DNA/RNA color scheme +/colorDict << + (G) orange + (T) red + (C) blue + (A) green + (U) red +END + ; + + return $returnVal; +} + +sub getAAColors { + my $returnVal = <<END +% Standard Amino Acid colors +/colorDict << + (G) green + (S) green + (T) green + (Y) green + (C) green + (N) purple + (Q) purple + (K) blue + (R) blue + (H) blue + (D) red + (E) red + (P) black + (A) black + (W) black + (F) black + (L) black + (I) black + (M) black + (V) black +END + ; + + return $returnVal; +} + +sub setColors { + my ($fillings, $defaults, $input) = @_; + my $colordef = getDefaultColors($defaults); + my $colordict = "/colorDict <<\n"; + + if ($fillings->{COLORSCHEME} eq "DEFAULT") { + $colordef = getDefaultColors($defaults); + + if ($fillings->{KIND} eq $AA) { + $colordict = getAAColors(); + } else { + $colordict = getNAColors(); + } + } elsif ($fillings->{COLORSCHEME} eq "BW") { + # do nothing for dict + } else { + + my %colorhash = %{ $fillings->{COLORS} }; + my $colorName = ""; + + foreach (keys %colorhash) { # keys are strings of residues, value = color name or color code (FF0000) + # add color to definitions + $colorName = $colorhash{$_}; + +# print("color = $_\n"); + + addColorDef(\$colordef, $colorName ) if (isHexColor($colorName)); + + # add have each residue use the color + foreach (split(//, $_)) { + # add color to dictionary + if (isHexColor($colorName)) { + $colordict .= " ($_) c$colorName\n" if !($_ =~ /^\s*$/); + } else { + $colordict .= " ($_) $colorName\n" if !($_ =~ /^\s*$/); + } + } + } + } + + $colordict .= "\n>> def"; + + # add to fillings + $fillings->{COLORDEF} = $colordef; + $fillings->{COLORDICT} = $colordict; +} + +sub addColorDef { +# print("adding to color def\n"); + my ($colordef_r, $color) = @_; + my $PSColor = getPSColor($color); + $$colordef_r .= "/c$color $PSColor def\n"; +} + +sub isHexColor { + return ($_[0] =~ /^[0-9a-fA-F]+$/) && (length $_[0] == 6); +} + +# know that it is hex color +sub getPSColor { + return "[" . hex(substr($_[0],0,2)) / 255 . " " . + hex(substr($_[0],2,2)) / 255 . " " . + hex(substr($_[0],4,2)) / 255 . "]"; +} + + +################################################################################ +##### SETTING DATA FIELD ##### +################################################################################ + +sub setData { + my ($fillings, $data_r) = @_; + + my @data = @$data_r; + my ($height, $letter); + my @slice; + my $data; + my $start_num = $fillings->{START_NUM}; + + my $start = $fillings->{LOGOSTART} - $start_num; # where in @data to start + my $end = $fillings->{LOGOEND} - $start_num; # where in @data to end + my $charsperline = $fillings->{CHARSPERLINE}; + + my $numlabel = $fillings->{LOGOSTART}; + + $end = ($end >= scalar @data) ? (scalar @data - 1) : $end; + + for (my $i=$start ; $i<=$end ; $i++) { + + # if add new lines +# if ((($i - $start) % $charsperline == 0) && +# ($i != $start) && # not first one +# ($i != $end)) { # not last one + if ((($i - $start) % $charsperline == 0) && + ($i != $start)) { # not first one + $data .= <<END +EndLine +StartLine + +END + ; + } + + @slice = @{$data[$i]}; + $data .= <<END +($numlabel) startstack +END + ; + + $numlabel++; + + foreach (@slice) { + ($letter,$height) = ($_ =~ /^(.{1})(\S+)/); + + # is space, so leave + if ($letter eq " ") { + last; + } + + # look for ">", which is symbol for error bar, then quit + if ($letter eq ">") { + last; + } + +# # look for negative heights +# if ($height < 0) { +# next; +# } + + $letter = (uc $letter); # always uppercase + $height = ($height < 0) ? 0 : $height; + $data .= " $height ($letter) numchar\n"; + } + + # put in error bars -- size is in $height as read in before + if ($fillings->{ERRBAR} && $letter ne " " && $height != 0) { + $data .= " $height Ibeam\n"; + } + + $data .= <<END +endstack + +END + ; + + } + + $fillings->{DATA} = $data; +} + +################################################################################ + +1;
--- a/wrapper.sh Mon Aug 18 04:42:59 2014 -0400 +++ b/wrapper.sh Wed Sep 10 10:33:29 2014 -0400 @@ -12,6 +12,8 @@ cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt cat $PWD/files/*/8_* > $PWD/mutationstats.txt cat $PWD/files/*/10_* > $PWD/hotspots.txt +cp $dir/HS12RSS.txt $outdir/ +cp $dir/HS23RSS.txt $outdir/ mkdir $outdir/identification/ python $dir/gene_identification.py --input $PWD/summary.txt --outdir $outdir/identification/ @@ -25,6 +27,8 @@ do echo "Running $gene <br />" >> $output mkdir $outdir/$gene + cp $dir/HS12RSS.txt $outdir/$gene/ + cp $dir/HS23RSS.txt $outdir/$gene/ echo "Filtering input..." >> $output Rscript $dir/filter.r $PWD/summary.txt $outdir/identification/${gene}.txt $outdir/$gene/summary.txt Rscript $dir/filter.r $PWD/mutationanalysis.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationanalysis.txt @@ -45,6 +49,8 @@ cat $mutationtmp $hotspottmp > $tmp paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2 cat $tmp2 > $outdir/mutationandhotspot.txt + rm $outdir/$gene/HS12RSS.txt + rm $outdir/$gene/HS23RSS.txt done Rscript --verbose $dir/mutation_analysis.r $PWD/mutationstats.txt $PWD/summary.txt $outdir/ 2>&1 @@ -98,10 +104,17 @@ Rscript $dir/piechart.r "${ca1_n},${ca2_n}" "IgA1 - ${ca1_n},IgA2 - ${ca2_n}" "IgA (N = $ca_n)" $outdir/ca.png 2>&1 Rscript $dir/piechart.r "${cg1_n},${cg2_n},${cg3_n},${cg4_n}" "IgG1 - ${cg1_n},IgG2 - ${cg2_n},IgG3 - ${cg3_n},IgG4 - ${cg4_n}" "IgG (N = $cg_n)" $outdir/cg.png 2>&1 +$dir/seqlogo -t "HS12RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs12.txt > $outdir/HS12.png 2>&1 +$dir/seqlogo -t "HS23RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs23.txt > $outdir/HS23.png 2>&1 + + echo "<img src='all.png'/>" >> $output echo "<img src='ca.png'/>" >> $output echo "<img src='cg.png'/>" >> $output +echo "<img src='HS12.png'/>" >> $output +echo "<img src='HS23.png'/>" >> $output + for gene in ${genes[@]} do echo "<table border='1'><caption>$gene transition table</caption>" >> $output @@ -120,3 +133,6 @@ echo "</table>" >> $output echo "</html>" >> $output + +#rm $outdir/HS12RSS.txt +#rm $outdir/HS23RSS.txt