Mercurial > repos > rnateam > antarna
comparison antaRNA.py @ 7:2f68c8d1c080 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/antarna/ commit 92a4a51ff5405ac0ba2a35bddb33608b99b61593-dirty
| author | rnateam |
|---|---|
| date | Tue, 12 May 2015 11:05:45 -0400 |
| parents | 3d00f7b51b9c |
| children |
comparison
equal
deleted
inserted
replaced
| 6:3d00f7b51b9c | 7:2f68c8d1c080 |
|---|---|
| 680 p.stdin.write(sequence+'\n') | 680 p.stdin.write(sequence+'\n') |
| 681 pks = p.communicate() | 681 pks = p.communicate() |
| 682 structure = "".join(pks[0].split("\n")[2].split(" ")[-1:]) | 682 structure = "".join(pks[0].split("\n")[2].split(" ")[-1:]) |
| 683 return structure | 683 return structure |
| 684 | 684 |
| 685 def init_RNAfold(temperature, paramFile = ""): | 685 def init_RNAfold(version, temperature, paramFile = ""): |
| 686 """ | 686 """ |
| 687 Initialization RNAfold listener | 687 Initialization RNAfold listener |
| 688 """ | 688 """ |
| 689 #p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAfold" | 689 p2p = "" |
| 690 p2p = "RNAfold" | |
| 691 | |
| 692 t = "-T " + str(temperature) | 690 t = "-T " + str(temperature) |
| 693 P = "" | 691 P = "" |
| 694 if paramFile != "": | 692 if paramFile != "": |
| 695 P = "-P " + paramFile | 693 P = "-P " + paramFile |
| 696 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), | 694 if version == 185: |
| 697 #shell = True, | 695 p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAfold" |
| 698 stdin = subprocess.PIPE, | 696 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), |
| 699 stdout = subprocess.PIPE, | 697 shell = True, |
| 700 stderr = subprocess.PIPE, | 698 stdin = subprocess.PIPE, |
| 701 close_fds = True) | 699 stdout = subprocess.PIPE, |
| 702 return p | 700 stderr = subprocess.PIPE, |
| 703 | 701 close_fds = True) |
| 704 | 702 return p |
| 703 elif version == 213: | |
| 704 p2p = "RNAfold" | |
| 705 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), | |
| 706 #shell = True, | |
| 707 stdin = subprocess.PIPE, | |
| 708 stdout = subprocess.PIPE, | |
| 709 stderr = subprocess.PIPE, | |
| 710 close_fds = True) | |
| 711 return p | |
| 712 else: | |
| 713 exit(0) | |
| 714 | |
| 705 def consult_RNAfold(seq, p): | 715 def consult_RNAfold(seq, p): |
| 706 """ | 716 """ |
| 707 Consults RNAfold listener | 717 Consults RNAfold listener |
| 708 """ | 718 """ |
| 709 p.stdin.write(seq+'\n') | 719 p.stdin.write(seq+'\n') |
| 1130 def runColony(s, SC, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy): | 1140 def runColony(s, SC, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy): |
| 1131 """ | 1141 """ |
| 1132 Execution function of a single ant colony finding one solution sequence | 1142 Execution function of a single ant colony finding one solution sequence |
| 1133 """ | 1143 """ |
| 1134 retString = "" | 1144 retString = "" |
| 1135 retString2 = "" | 1145 retString2 = [] |
| 1136 BPstack, LP = getBPStack(s, SC) | 1146 BPstack, LP = getBPStack(s, SC) |
| 1137 | 1147 |
| 1138 rGC = reachableGC(SC) | 1148 rGC = reachableGC(SC) |
| 1139 GC_message = "" | 1149 GC_message = "" |
| 1140 if GC > rGC: | 1150 if GC > rGC: |
| 1156 | 1166 |
| 1157 | 1167 |
| 1158 #### | 1168 #### |
| 1159 # INITIALIZATION OF THE RNA TOOLs | 1169 # INITIALIZATION OF THE RNA TOOLs |
| 1160 # | 1170 # |
| 1161 RNAfold = init_RNAfold(temperature, paramFile) | 1171 RNAfold = init_RNAfold(213, temperature, paramFile) |
| 1162 #RNAdistance = init_RNAdistance() | 1172 #RNAdistance = init_RNAdistance() |
| 1163 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+') | 1173 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+') |
| 1164 #RNAdist_pattern = re.compile('.*\s([\d]+)') | 1174 #RNAdist_pattern = re.compile('.*\s([\d]+)') |
| 1165 # | 1175 # |
| 1166 #### | 1176 #### |
| 1348 retString += "|dGC:" + str(best_solution[4]) | 1358 retString += "|dGC:" + str(best_solution[4]) |
| 1349 retString += "|GC:" + str(getGC(sequence)*100) | 1359 retString += "|GC:" + str(getGC(sequence)*100) |
| 1350 retString += "|dseq:" + str(getSequenceEditDistance(SC, sequence)) | 1360 retString += "|dseq:" + str(getSequenceEditDistance(SC, sequence)) |
| 1351 retString += "|L:" + str(len(sequence)) | 1361 retString += "|L:" + str(len(sequence)) |
| 1352 retString += "|Time:" + str(duration) | 1362 retString += "|Time:" + str(duration) |
| 1353 retString2 += "\n" + struct + "\n" | 1363 |
| 1354 retString2 += sequence | 1364 retString2.append(struct) |
| 1365 retString2.append(sequence) | |
| 1355 | 1366 |
| 1356 # CLOSING THE PIPES TO THE PROGRAMS | 1367 # CLOSING THE PIPES TO THE PROGRAMS |
| 1357 RNAfold.communicate() | 1368 RNAfold.communicate() |
| 1358 #RNAdistance.communicate() | 1369 #RNAdistance.communicate() |
| 1359 | 1370 |
| 1383 struct_correction_term = float(struct_correction_term) | 1394 struct_correction_term = float(struct_correction_term) |
| 1384 GC_correction_term = float(GC_correction_term) | 1395 GC_correction_term = float(GC_correction_term) |
| 1385 seq_correction_term = float(seq_correction_term) | 1396 seq_correction_term = float(seq_correction_term) |
| 1386 colonies = int(colonies) | 1397 colonies = int(colonies) |
| 1387 file_id = str(file_id) | 1398 file_id = str(file_id) |
| 1388 verbose = verbose | 1399 tmp_verbose = verbose |
| 1389 output_verbose = output_verbose | 1400 tmp_output_verbose = output_verbose |
| 1401 verbose = tmp_output_verbose # Due to later change, this is a twistaround and a switching of purpose | |
| 1402 output_verbose = tmp_verbose # Due to later change, this is a twistaround and a switching of purpose | |
| 1390 correction_terms = struct_correction_term, GC_correction_term, seq_correction_term | 1403 correction_terms = struct_correction_term, GC_correction_term, seq_correction_term |
| 1391 temperature = float(temperature) | 1404 temperature = float(temperature) |
| 1392 print_to_STDOUT = (file_id == "STDOUT") | 1405 print_to_STDOUT = (file_id == "STDOUT") |
| 1393 | 1406 |
| 1394 useGU = useGU | 1407 useGU = useGU |
| 1427 output_v, output_w = runColony(structure, sequenceconstraint, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy) | 1440 output_v, output_w = runColony(structure, sequenceconstraint, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy) |
| 1428 | 1441 |
| 1429 # Post-Processing the output of a ant colony procedure | 1442 # Post-Processing the output of a ant colony procedure |
| 1430 line = ">" + name + str(col) | 1443 line = ">" + name + str(col) |
| 1431 if output_verbose: | 1444 if output_verbose: |
| 1432 line += "|Cstr:" + structure + "|Cseq:" + sequenceconstraint + "|Alpha:" + str(alpha) + "|Beta:" + str(beta) + "|tGC:" + str(GC) + "|ER:" + str(evaporation_rate) + "|Struct_CT:" + str(struct_correction_term) + "|GC_CT:" + str(GC_correction_term) + "|Seq_CT:" + str(seq_correction_term) + output_v + output_w | 1445 line += "|Cstr:" + structure + "|Cseq:" + sequenceconstraint + "|Alpha:" + str(alpha) + "|Beta:" + str(beta) + "|tGC:" + str(GC) + "|ER:" + str(evaporation_rate) + "|Struct_CT:" + str(struct_correction_term) + "|GC_CT:" + str(GC_correction_term) + "|Seq_CT:" + str(seq_correction_term) + output_v + "\n" + "\n".join(output_w) |
| 1433 else: | 1446 else: |
| 1434 line += output_w | 1447 line += "\n" + output_w[1] |
| 1435 if return_mod == False: | 1448 if return_mod == False: |
| 1436 if print_to_STDOUT: | 1449 if print_to_STDOUT: |
| 1437 print line | 1450 print line |
| 1438 else: | 1451 else: |
| 1439 if col == 0: | 1452 if col == 0: |
| 1467 | 1480 |
| 1468 | 1481 |
| 1469 alpha = args.alpha | 1482 alpha = args.alpha |
| 1470 beta = args.beta | 1483 beta = args.beta |
| 1471 tGC = args.tGC | 1484 tGC = args.tGC |
| 1485 if tGC < 0 or tGC > 1: | |
| 1486 print "Error: Chosen tGC not in range [0,1]" | |
| 1487 exit(1) | |
| 1472 evaporation_rate = args.ER | 1488 evaporation_rate = args.ER |
| 1473 struct_correction_term = args.Cstrweight | 1489 struct_correction_term = args.Cstrweight |
| 1474 GC_correction_term = args.Cgcweight | 1490 GC_correction_term = args.Cgcweight |
| 1475 seq_correction_term = args.Cseqweight | 1491 seq_correction_term = args.Cseqweight |
| 1476 colonies = args.noOfColonies | 1492 colonies = args.noOfColonies |
| 1521 MAIN EXECUTABLE WHICH PARSES THE INPUT LINE | 1537 MAIN EXECUTABLE WHICH PARSES THE INPUT LINE |
| 1522 """ | 1538 """ |
| 1523 | 1539 |
| 1524 argument_parser = argparse.ArgumentParser( | 1540 argument_parser = argparse.ArgumentParser( |
| 1525 description = """ | 1541 description = """ |
| 1526 Ant Colony Optimized RNA Sequence Design | |
| 1527 """, | |
| 1528 | |
| 1529 epilog = """ | |
| 1530 | 1542 |
| 1531 ######################################################################### | 1543 ######################################################################### |
| 1532 # antaRNA - ant assembled RNA # | 1544 # antaRNA - ant assembled RNA # |
| 1533 # -> Ant Colony Optimized RNA Sequence Design # | 1545 # -> Ant Colony Optimized RNA Sequence Design # |
| 1534 # ------------------------------------------------------------ # | 1546 # ------------------------------------------------------------ # |
| 1543 | 1555 |
| 1544 - antaRNA was only tested under Linux. | 1556 - antaRNA was only tested under Linux. |
| 1545 | 1557 |
| 1546 - For questions and remarks please feel free to contact us at http://www.bioinf.uni-freiburg.de/ | 1558 - For questions and remarks please feel free to contact us at http://www.bioinf.uni-freiburg.de/ |
| 1547 | 1559 |
| 1560 """, | |
| 1561 | |
| 1562 epilog = """ | |
| 1548 Example calls: | 1563 Example calls: |
| 1549 python antaRNA.py --Cstr "...(((...)))..." --tGC 0.5 -n 2 | 1564 python antaRNA.py --Cstr "...(((...)))..." --tGC 0.5 -n 2 |
| 1550 python antaRNA.py --Cstr ".........AAA(((...)))AAA........." --tGC 0.5 -n 10 --output_file /path/to/antaRNA_TESTRUN -ov | 1565 python antaRNA.py --Cstr ".........AAA(((...)))AAA........." --tGC 0.5 -n 10 --output_file /path/to/antaRNA_TESTRUN -ov |
| 1551 python antaRNA.py --Cstr "BBBBB....AAA(((...)))AAA....BBBBB" --Cseq "NNNNANNNNNCNNNNNNNNNNNGNNNNNNUNNN" --tGC 0.5 -n 10 | 1566 python antaRNA.py --Cstr "BBBBB....AAA(((...)))AAA....BBBBB" --Cseq "NNNNANNNNNCNNNNNNNNNNNGNNNNNNUNNN" --tGC 0.5 -n 10 |
| 1552 | 1567 |
| 1553 ######################################################################### | 1568 ######################################################################### |
| 1554 # --- Hail to the King!!! All power to the swarm!!! --- # | 1569 # --- Hail to the Queen!!! All power to the swarm!!! --- # |
| 1555 ######################################################################### | 1570 ######################################################################### |
| 1556 """, | 1571 """, |
| 1557 #formatter_class=RawTextHelpFormatter | 1572 #formatter_class=RawTextHelpFormatter |
| 1558 ) | 1573 ) |
| 1559 | 1574 |
| 1560 # mandatorys | 1575 # mandatorys |
| 1561 argument_parser.add_argument("-Cstr", "--Cstr", help="Structure constraint using RNA dotbracket notation with fuzzy block constraint. \n(TYPE: %(type)s)\n\n", type=str, required=True) | 1576 argument_parser.add_argument("-Cstr", "--Cstr", help="Structure constraint using RNA dotbracket notation with fuzzy block constraint. \n(TYPE: %(type)s)\n\n", type=str, required=True) |
| 1562 argument_parser.add_argument("-tGC", "--tGC", help="Objective target GC content in [0,1].\n(TYPE: %(type)s)\n\n", type=float, required=True) | 1577 argument_parser.add_argument("-tGC", "--tGC", help="Objective target GC content in [0,1].\n(TYPE: %(type)s)\n\n", type=float, required=True) |
| 1563 argument_parser.add_argument("-n", "--noOfColonies", help="Number of sequences which shall be produced. \n(TYPE: %(type)s)\n\n\n\n", type=int, required=True) | 1578 argument_parser.add_argument("-n", "--noOfColonies", help="Number of sequences which shall be produced. \n(TYPE: %(type)s)\n\n\n\n", type=int, default=1) |
| 1564 argument_parser.add_argument("-GU", "--useGUBasePair", help="Allowing GU base pairs. \n\n", action="store_true") | 1579 argument_parser.add_argument("-GU", "--useGUBasePair", help="Allowing GU base pairs. \n\n", action="store_true") |
| 1565 | 1580 |
| 1566 argument_parser.add_argument("-s", "--seed", help = "Provides a seed value for the used pseudo random number generator.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="none") | 1581 argument_parser.add_argument("-s", "--seed", help = "Provides a seed value for the used pseudo random number generator.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="none") |
| 1567 argument_parser.add_argument("-ip", "--improve_procedure", help = "Select the improving method. h=hierarchical, s=score_based.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="s") | 1582 argument_parser.add_argument("-ip", "--improve_procedure", help = "Select the improving method. h=hierarchical, s=score_based.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="s") |
| 1568 argument_parser.add_argument("-r", "--Resets", help = "Amount of maximal terrain resets, until the best solution is retuned as solution.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=5) | 1583 argument_parser.add_argument("-r", "--Resets", help = "Amount of maximal terrain resets, until the best solution is retuned as solution.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=5) |
| 1582 argument_parser.add_argument("-P", "--paramFile", help = "Changes the energy parameterfile of RNAfold. If using this explicitly, please provide a suitable energy file delivered by RNAfold. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="") | 1597 argument_parser.add_argument("-P", "--paramFile", help = "Changes the energy parameterfile of RNAfold. If using this explicitly, please provide a suitable energy file delivered by RNAfold. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="") |
| 1583 argument_parser.add_argument("-of","--output_file", help="Provide a path and an output file, e.g. \"/path/to/the/target_file\". \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="STDOUT") | 1598 argument_parser.add_argument("-of","--output_file", help="Provide a path and an output file, e.g. \"/path/to/the/target_file\". \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="STDOUT") |
| 1584 argument_parser.add_argument("-Cseq", "--Cseq", help="Sequence constraint using RNA nucleotide alphabet {A,C,G,U} and wild-card \"N\". \n(TYPE: %(type)s)\n\n", type=str, default = "") | 1599 argument_parser.add_argument("-Cseq", "--Cseq", help="Sequence constraint using RNA nucleotide alphabet {A,C,G,U} and wild-card \"N\". \n(TYPE: %(type)s)\n\n", type=str, default = "") |
| 1585 argument_parser.add_argument("-l", "--level", help="Sets the level of allowed influence of sequence constraint on the structure constraint [0:no influence; 3:extensive influence].\n(TYPE: %(type)s)\n\n", type=int, default = 1) | 1600 argument_parser.add_argument("-l", "--level", help="Sets the level of allowed influence of sequence constraint on the structure constraint [0:no influence; 3:extensive influence].\n(TYPE: %(type)s)\n\n", type=int, default = 1) |
| 1586 argument_parser.add_argument("--name", help="Defines a name which is used in the sequence output. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="antaRNA_") | 1601 argument_parser.add_argument("--name", help="Defines a name which is used in the sequence output. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="antaRNA_") |
| 1587 argument_parser.add_argument("-a", "--alpha", help="Sets alpha, probability weight for terrain path influence. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) | 1602 argument_parser.add_argument("-a", "--alpha", help="Sets alpha, probability weight for terrain pheromone influence. [0,1] \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) |
| 1588 argument_parser.add_argument("-b", "--beta", help="Sets beta, probability weight for terrain pheromone influence. [0,1] \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) | 1603 argument_parser.add_argument("-b", "--beta", help="Sets beta, probability weight for terrain path influence. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) |
| 1589 argument_parser.add_argument("-er", "--ER", help="Pheromone evaporation rate. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.2) | 1604 argument_parser.add_argument("-er", "--ER", help="Pheromone evaporation rate. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.2) |
| 1590 argument_parser.add_argument("-Cstrw", "--Cstrweight", help="Structure constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.5) | 1605 argument_parser.add_argument("-Cstrw", "--Cstrweight", help="Structure constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.5) |
| 1591 argument_parser.add_argument("-Cgcw", "--Cgcweight", help="GC content constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=5.0) | 1606 argument_parser.add_argument("-Cgcw", "--Cgcweight", help="GC content constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=5.0) |
| 1592 argument_parser.add_argument("-Cseqw", "--Cseqweight", help="Sequence constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n\n", type=float, default=1.0) | 1607 argument_parser.add_argument("-Cseqw", "--Cseqweight", help="Sequence constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n\n", type=float, default=1.0) |
| 1593 argument_parser.add_argument("-v", "--verbose", help="Displayes intermediate output.\n\n", action="store_true") | 1608 argument_parser.add_argument("-ov", "--output_verbose", help="Displayes intermediate output.\n\n", action="store_true") |
| 1594 argument_parser.add_argument("-ov", "--output_verbose", help="Prints additional output to the headers of the produced sequences.\n\n", action="store_false") | 1609 argument_parser.add_argument("-v", "--verbose", help="Prints additional features and stats to the headers of the produced sequences. Also adds the structure of the sequence.\n\n", action="store_true") |
| 1595 | 1610 |
| 1596 args = argument_parser.parse_args() | 1611 args = argument_parser.parse_args() |
| 1597 | 1612 |
| 1598 execute(args) | 1613 execute(args) |
| 1599 | 1614 |
