| 0 | 1 #!/usr/bin/env perl | 
|  | 2 | 
|  | 3 # Parses align file, calculates read depth (RD) and genome representation (GR) | 
|  | 4 # of individual contigs; outputs contig sequences in fasta format with this | 
|  | 5 # information in the fasta header: | 
|  | 6 # >CLXConitgY (length-average_RD-GR) | 
|  | 7 # | 
|  | 8 # RD profiles along the contig sequences can also be calculated and saved | 
|  | 9 # in a separate file. | 
|  | 10 # | 
|  | 11 # (renamed from align_parsing_2.pl, 2010-05-10) | 
|  | 12 | 
|  | 13 | 
|  | 14 use Getopt::Std; | 
|  | 15 use warnings; | 
|  | 16 | 
|  | 17 getopt('iop'); | 
|  | 18 if ($opt_i) { | 
|  | 19   $align_file = $opt_i;    # | 
|  | 20 } else { | 
|  | 21   die "Missing parameter: -i align_file\n"; | 
|  | 22 } | 
|  | 23 if ($opt_o) { | 
|  | 24   $out_file = $opt_o;    # | 
|  | 25 } else { | 
|  | 26   $out_file = $align_file.".info"; | 
|  | 27   print "Output file not set, using \"$out_file\"\n"; | 
|  | 28 } | 
|  | 29 if ($opt_p) { | 
|  | 30   $profile_file = $opt_p;    # RD profiles will be recorded | 
|  | 31 } else { | 
|  | 32   $profile_file = 0;     # will be used below to switch profile output off | 
|  | 33   print "Parameter -p not set, RD profiles will not be saved\n"; | 
|  | 34 } | 
|  | 35 | 
|  | 36 open (ALIGN,$align_file) or die "Could not read from $align_file\n"; | 
|  | 37 open (OUT,">$out_file") or die "Could not write to $out_file\n"; | 
|  | 38 if ($profile_file) { | 
|  | 39   open (PROF,">$profile_file") or die "Could not write to $profile_file\n"; | 
|  | 40 } | 
|  | 41 | 
|  | 42 while ($radek = <ALIGN>) { | 
|  | 43   if ($radek =~/^DETAILED DISPLAY OF CONTIGS/) {    # start of alignment section | 
|  | 44       # print $radek; | 
|  | 45     $radek = <ALIGN>; | 
|  | 46     while ($radek =~/\*\*\* (CL\d+) {0,1}Contig (\d+) \*\*\*/) {   # parsing individual contigs | 
|  | 47       #  print $radek; | 
|  | 48       $contig_id = $1."Contig".$2; | 
|  | 49       @pokryti = ();       # array ve ktere bude pro kazdou pozici v sekvenci pokryti | 
|  | 50       $seq = "";           # consensus sekvence contigu | 
|  | 51 #      print "$contig_id\n"; | 
|  | 52 #      print $radek; | 
|  | 53       $radek = <ALIGN>; | 
|  | 54       while ($radek =~/\:    \.    \:/) {  # -> nasleduje dalsi blok | 
|  | 55         @blok = ();                        # radky z aktualniho bloku alignmentu | 
|  | 56         do { | 
|  | 57           $radek = <ALIGN>; | 
|  | 58           push(@blok,$radek); | 
|  | 59         } while (not $radek =~/^consensus/); | 
|  | 60         $cons_line = $radek; | 
|  | 61         $radek = <ALIGN>; | 
|  | 62         $radek = <ALIGN>;     # toto posledni nacteni radku slouzi v podmince tohoto i nadrazeneho cyklu while | 
|  | 63         if (not $radek) { $radek = " "; };      # aby to nehlasilo chyby na konci souboru kde ty radky muzou chybet | 
|  | 64 | 
|  | 65         pop(@blok); pop(@blok);            # odstrani posledni dva radky (mezera a consensus) | 
|  | 66 | 
|  | 67         for ($f=10;$f<=length($cons_line);$f++) { | 
|  | 68           $suma_pozice = 0; | 
|  | 69           if (substr($cons_line,$f,1) =~/([A,T,G,C,N])/) { | 
|  | 70             $seq .= $1; | 
|  | 71             foreach $cteni (@blok) { | 
|  | 72               if (substr($cteni,$f,1) =~/[A,T,G,C,N]/) { | 
|  | 73                 $suma_pozice++; | 
|  | 74               } | 
|  | 75             } | 
|  | 76             push(@pokryti,$suma_pozice); | 
|  | 77           } | 
|  | 78         } | 
|  | 79 | 
|  | 80       } | 
|  | 81 | 
|  | 82       $delka_cons = @pokryti; | 
|  | 83       $soucet = 0; | 
|  | 84       $prumer = 0; | 
|  | 85       foreach $suma_pozice (@pokryti) { | 
|  | 86         $soucet += $suma_pozice; | 
|  | 87       } | 
|  | 88       $prumer = sprintf("%0.1f",$soucet/$delka_cons); | 
|  | 89 | 
|  | 90       print OUT ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | 
|  | 91       while ($seq_line = substr($seq,0,60,"")) { | 
|  | 92         print OUT "$seq_line\n"; | 
|  | 93       } | 
|  | 94       if ($profile_file) { | 
|  | 95         print PROF ">$contig_id ($delka_cons-$prumer-$soucet)\n"; | 
|  | 96         foreach $suma_pozice (@pokryti) { | 
|  | 97           print PROF "$suma_pozice "; | 
|  | 98         } | 
|  | 99         print PROF "\n"; | 
|  | 100       } | 
|  | 101 | 
|  | 102     } | 
|  | 103   } | 
|  | 104 } | 
|  | 105 | 
|  | 106 close ALIGN; | 
|  | 107 close OUT; | 
|  | 108 if ($profile_file) { | 
|  | 109   close PROF; | 
|  | 110 } | 
|  | 111 |