comparison antaRNA.py @ 7:2f68c8d1c080 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/antarna/ commit 92a4a51ff5405ac0ba2a35bddb33608b99b61593-dirty
author rnateam
date Tue, 12 May 2015 11:05:45 -0400
parents 3d00f7b51b9c
children
comparison
equal deleted inserted replaced
6:3d00f7b51b9c 7:2f68c8d1c080
680 p.stdin.write(sequence+'\n') 680 p.stdin.write(sequence+'\n')
681 pks = p.communicate() 681 pks = p.communicate()
682 structure = "".join(pks[0].split("\n")[2].split(" ")[-1:]) 682 structure = "".join(pks[0].split("\n")[2].split(" ")[-1:])
683 return structure 683 return structure
684 684
685 def init_RNAfold(temperature, paramFile = ""): 685 def init_RNAfold(version, temperature, paramFile = ""):
686 """ 686 """
687 Initialization RNAfold listener 687 Initialization RNAfold listener
688 """ 688 """
689 #p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAfold" 689 p2p = ""
690 p2p = "RNAfold"
691
692 t = "-T " + str(temperature) 690 t = "-T " + str(temperature)
693 P = "" 691 P = ""
694 if paramFile != "": 692 if paramFile != "":
695 P = "-P " + paramFile 693 P = "-P " + paramFile
696 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]), 694 if version == 185:
697 #shell = True, 695 p2p = "/home/rk/Software/ViennaRNA/ViennaRNA-1.8.5/Progs/RNAfold"
698 stdin = subprocess.PIPE, 696 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]),
699 stdout = subprocess.PIPE, 697 shell = True,
700 stderr = subprocess.PIPE, 698 stdin = subprocess.PIPE,
701 close_fds = True) 699 stdout = subprocess.PIPE,
702 return p 700 stderr = subprocess.PIPE,
703 701 close_fds = True)
704 702 return p
703 elif version == 213:
704 p2p = "RNAfold"
705 p = subprocess.Popen( ([p2p, '--noPS', '-d 2', t, P]),
706 #shell = True,
707 stdin = subprocess.PIPE,
708 stdout = subprocess.PIPE,
709 stderr = subprocess.PIPE,
710 close_fds = True)
711 return p
712 else:
713 exit(0)
714
705 def consult_RNAfold(seq, p): 715 def consult_RNAfold(seq, p):
706 """ 716 """
707 Consults RNAfold listener 717 Consults RNAfold listener
708 """ 718 """
709 p.stdin.write(seq+'\n') 719 p.stdin.write(seq+'\n')
1130 def runColony(s, SC, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy): 1140 def runColony(s, SC, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy):
1131 """ 1141 """
1132 Execution function of a single ant colony finding one solution sequence 1142 Execution function of a single ant colony finding one solution sequence
1133 """ 1143 """
1134 retString = "" 1144 retString = ""
1135 retString2 = "" 1145 retString2 = []
1136 BPstack, LP = getBPStack(s, SC) 1146 BPstack, LP = getBPStack(s, SC)
1137 1147
1138 rGC = reachableGC(SC) 1148 rGC = reachableGC(SC)
1139 GC_message = "" 1149 GC_message = ""
1140 if GC > rGC: 1150 if GC > rGC:
1156 1166
1157 1167
1158 #### 1168 ####
1159 # INITIALIZATION OF THE RNA TOOLs 1169 # INITIALIZATION OF THE RNA TOOLs
1160 # 1170 #
1161 RNAfold = init_RNAfold(temperature, paramFile) 1171 RNAfold = init_RNAfold(213, temperature, paramFile)
1162 #RNAdistance = init_RNAdistance() 1172 #RNAdistance = init_RNAdistance()
1163 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+') 1173 RNAfold_pattern = re.compile('.+\n([.()]+)\s.+')
1164 #RNAdist_pattern = re.compile('.*\s([\d]+)') 1174 #RNAdist_pattern = re.compile('.*\s([\d]+)')
1165 # 1175 #
1166 #### 1176 ####
1348 retString += "|dGC:" + str(best_solution[4]) 1358 retString += "|dGC:" + str(best_solution[4])
1349 retString += "|GC:" + str(getGC(sequence)*100) 1359 retString += "|GC:" + str(getGC(sequence)*100)
1350 retString += "|dseq:" + str(getSequenceEditDistance(SC, sequence)) 1360 retString += "|dseq:" + str(getSequenceEditDistance(SC, sequence))
1351 retString += "|L:" + str(len(sequence)) 1361 retString += "|L:" + str(len(sequence))
1352 retString += "|Time:" + str(duration) 1362 retString += "|Time:" + str(duration)
1353 retString2 += "\n" + struct + "\n" 1363
1354 retString2 += sequence 1364 retString2.append(struct)
1365 retString2.append(sequence)
1355 1366
1356 # CLOSING THE PIPES TO THE PROGRAMS 1367 # CLOSING THE PIPES TO THE PROGRAMS
1357 RNAfold.communicate() 1368 RNAfold.communicate()
1358 #RNAdistance.communicate() 1369 #RNAdistance.communicate()
1359 1370
1383 struct_correction_term = float(struct_correction_term) 1394 struct_correction_term = float(struct_correction_term)
1384 GC_correction_term = float(GC_correction_term) 1395 GC_correction_term = float(GC_correction_term)
1385 seq_correction_term = float(seq_correction_term) 1396 seq_correction_term = float(seq_correction_term)
1386 colonies = int(colonies) 1397 colonies = int(colonies)
1387 file_id = str(file_id) 1398 file_id = str(file_id)
1388 verbose = verbose 1399 tmp_verbose = verbose
1389 output_verbose = output_verbose 1400 tmp_output_verbose = output_verbose
1401 verbose = tmp_output_verbose # Due to later change, this is a twistaround and a switching of purpose
1402 output_verbose = tmp_verbose # Due to later change, this is a twistaround and a switching of purpose
1390 correction_terms = struct_correction_term, GC_correction_term, seq_correction_term 1403 correction_terms = struct_correction_term, GC_correction_term, seq_correction_term
1391 temperature = float(temperature) 1404 temperature = float(temperature)
1392 print_to_STDOUT = (file_id == "STDOUT") 1405 print_to_STDOUT = (file_id == "STDOUT")
1393 1406
1394 useGU = useGU 1407 useGU = useGU
1427 output_v, output_w = runColony(structure, sequenceconstraint, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy) 1440 output_v, output_w = runColony(structure, sequenceconstraint, objective_to_target_distance, GC, alpha, beta, evaporation_rate, correction_terms, verbose, IUPAC, IUPAC_compatibles, degreeOfSequenceInducement, IUPAC_reverseComplements, termination_convergence, convergence_count, reset_limit, improve, temperature, paramFile, pseudoknots, strategy)
1428 1441
1429 # Post-Processing the output of a ant colony procedure 1442 # Post-Processing the output of a ant colony procedure
1430 line = ">" + name + str(col) 1443 line = ">" + name + str(col)
1431 if output_verbose: 1444 if output_verbose:
1432 line += "|Cstr:" + structure + "|Cseq:" + sequenceconstraint + "|Alpha:" + str(alpha) + "|Beta:" + str(beta) + "|tGC:" + str(GC) + "|ER:" + str(evaporation_rate) + "|Struct_CT:" + str(struct_correction_term) + "|GC_CT:" + str(GC_correction_term) + "|Seq_CT:" + str(seq_correction_term) + output_v + output_w 1445 line += "|Cstr:" + structure + "|Cseq:" + sequenceconstraint + "|Alpha:" + str(alpha) + "|Beta:" + str(beta) + "|tGC:" + str(GC) + "|ER:" + str(evaporation_rate) + "|Struct_CT:" + str(struct_correction_term) + "|GC_CT:" + str(GC_correction_term) + "|Seq_CT:" + str(seq_correction_term) + output_v + "\n" + "\n".join(output_w)
1433 else: 1446 else:
1434 line += output_w 1447 line += "\n" + output_w[1]
1435 if return_mod == False: 1448 if return_mod == False:
1436 if print_to_STDOUT: 1449 if print_to_STDOUT:
1437 print line 1450 print line
1438 else: 1451 else:
1439 if col == 0: 1452 if col == 0:
1467 1480
1468 1481
1469 alpha = args.alpha 1482 alpha = args.alpha
1470 beta = args.beta 1483 beta = args.beta
1471 tGC = args.tGC 1484 tGC = args.tGC
1485 if tGC < 0 or tGC > 1:
1486 print "Error: Chosen tGC not in range [0,1]"
1487 exit(1)
1472 evaporation_rate = args.ER 1488 evaporation_rate = args.ER
1473 struct_correction_term = args.Cstrweight 1489 struct_correction_term = args.Cstrweight
1474 GC_correction_term = args.Cgcweight 1490 GC_correction_term = args.Cgcweight
1475 seq_correction_term = args.Cseqweight 1491 seq_correction_term = args.Cseqweight
1476 colonies = args.noOfColonies 1492 colonies = args.noOfColonies
1521 MAIN EXECUTABLE WHICH PARSES THE INPUT LINE 1537 MAIN EXECUTABLE WHICH PARSES THE INPUT LINE
1522 """ 1538 """
1523 1539
1524 argument_parser = argparse.ArgumentParser( 1540 argument_parser = argparse.ArgumentParser(
1525 description = """ 1541 description = """
1526 Ant Colony Optimized RNA Sequence Design
1527 """,
1528
1529 epilog = """
1530 1542
1531 ######################################################################### 1543 #########################################################################
1532 # antaRNA - ant assembled RNA # 1544 # antaRNA - ant assembled RNA #
1533 # -> Ant Colony Optimized RNA Sequence Design # 1545 # -> Ant Colony Optimized RNA Sequence Design #
1534 # ------------------------------------------------------------ # 1546 # ------------------------------------------------------------ #
1543 1555
1544 - antaRNA was only tested under Linux. 1556 - antaRNA was only tested under Linux.
1545 1557
1546 - For questions and remarks please feel free to contact us at http://www.bioinf.uni-freiburg.de/ 1558 - For questions and remarks please feel free to contact us at http://www.bioinf.uni-freiburg.de/
1547 1559
1560 """,
1561
1562 epilog = """
1548 Example calls: 1563 Example calls:
1549 python antaRNA.py --Cstr "...(((...)))..." --tGC 0.5 -n 2 1564 python antaRNA.py --Cstr "...(((...)))..." --tGC 0.5 -n 2
1550 python antaRNA.py --Cstr ".........AAA(((...)))AAA........." --tGC 0.5 -n 10 --output_file /path/to/antaRNA_TESTRUN -ov 1565 python antaRNA.py --Cstr ".........AAA(((...)))AAA........." --tGC 0.5 -n 10 --output_file /path/to/antaRNA_TESTRUN -ov
1551 python antaRNA.py --Cstr "BBBBB....AAA(((...)))AAA....BBBBB" --Cseq "NNNNANNNNNCNNNNNNNNNNNGNNNNNNUNNN" --tGC 0.5 -n 10 1566 python antaRNA.py --Cstr "BBBBB....AAA(((...)))AAA....BBBBB" --Cseq "NNNNANNNNNCNNNNNNNNNNNGNNNNNNUNNN" --tGC 0.5 -n 10
1552 1567
1553 ######################################################################### 1568 #########################################################################
1554 # --- Hail to the King!!! All power to the swarm!!! --- # 1569 # --- Hail to the Queen!!! All power to the swarm!!! --- #
1555 ######################################################################### 1570 #########################################################################
1556 """, 1571 """,
1557 #formatter_class=RawTextHelpFormatter 1572 #formatter_class=RawTextHelpFormatter
1558 ) 1573 )
1559 1574
1560 # mandatorys 1575 # mandatorys
1561 argument_parser.add_argument("-Cstr", "--Cstr", help="Structure constraint using RNA dotbracket notation with fuzzy block constraint. \n(TYPE: %(type)s)\n\n", type=str, required=True) 1576 argument_parser.add_argument("-Cstr", "--Cstr", help="Structure constraint using RNA dotbracket notation with fuzzy block constraint. \n(TYPE: %(type)s)\n\n", type=str, required=True)
1562 argument_parser.add_argument("-tGC", "--tGC", help="Objective target GC content in [0,1].\n(TYPE: %(type)s)\n\n", type=float, required=True) 1577 argument_parser.add_argument("-tGC", "--tGC", help="Objective target GC content in [0,1].\n(TYPE: %(type)s)\n\n", type=float, required=True)
1563 argument_parser.add_argument("-n", "--noOfColonies", help="Number of sequences which shall be produced. \n(TYPE: %(type)s)\n\n\n\n", type=int, required=True) 1578 argument_parser.add_argument("-n", "--noOfColonies", help="Number of sequences which shall be produced. \n(TYPE: %(type)s)\n\n\n\n", type=int, default=1)
1564 argument_parser.add_argument("-GU", "--useGUBasePair", help="Allowing GU base pairs. \n\n", action="store_true") 1579 argument_parser.add_argument("-GU", "--useGUBasePair", help="Allowing GU base pairs. \n\n", action="store_true")
1565 1580
1566 argument_parser.add_argument("-s", "--seed", help = "Provides a seed value for the used pseudo random number generator.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="none") 1581 argument_parser.add_argument("-s", "--seed", help = "Provides a seed value for the used pseudo random number generator.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="none")
1567 argument_parser.add_argument("-ip", "--improve_procedure", help = "Select the improving method. h=hierarchical, s=score_based.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="s") 1582 argument_parser.add_argument("-ip", "--improve_procedure", help = "Select the improving method. h=hierarchical, s=score_based.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="s")
1568 argument_parser.add_argument("-r", "--Resets", help = "Amount of maximal terrain resets, until the best solution is retuned as solution.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=5) 1583 argument_parser.add_argument("-r", "--Resets", help = "Amount of maximal terrain resets, until the best solution is retuned as solution.\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=int, default=5)
1582 argument_parser.add_argument("-P", "--paramFile", help = "Changes the energy parameterfile of RNAfold. If using this explicitly, please provide a suitable energy file delivered by RNAfold. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="") 1597 argument_parser.add_argument("-P", "--paramFile", help = "Changes the energy parameterfile of RNAfold. If using this explicitly, please provide a suitable energy file delivered by RNAfold. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="")
1583 argument_parser.add_argument("-of","--output_file", help="Provide a path and an output file, e.g. \"/path/to/the/target_file\". \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="STDOUT") 1598 argument_parser.add_argument("-of","--output_file", help="Provide a path and an output file, e.g. \"/path/to/the/target_file\". \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="STDOUT")
1584 argument_parser.add_argument("-Cseq", "--Cseq", help="Sequence constraint using RNA nucleotide alphabet {A,C,G,U} and wild-card \"N\". \n(TYPE: %(type)s)\n\n", type=str, default = "") 1599 argument_parser.add_argument("-Cseq", "--Cseq", help="Sequence constraint using RNA nucleotide alphabet {A,C,G,U} and wild-card \"N\". \n(TYPE: %(type)s)\n\n", type=str, default = "")
1585 argument_parser.add_argument("-l", "--level", help="Sets the level of allowed influence of sequence constraint on the structure constraint [0:no influence; 3:extensive influence].\n(TYPE: %(type)s)\n\n", type=int, default = 1) 1600 argument_parser.add_argument("-l", "--level", help="Sets the level of allowed influence of sequence constraint on the structure constraint [0:no influence; 3:extensive influence].\n(TYPE: %(type)s)\n\n", type=int, default = 1)
1586 argument_parser.add_argument("--name", help="Defines a name which is used in the sequence output. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="antaRNA_") 1601 argument_parser.add_argument("--name", help="Defines a name which is used in the sequence output. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=str, default="antaRNA_")
1587 argument_parser.add_argument("-a", "--alpha", help="Sets alpha, probability weight for terrain path influence. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) 1602 argument_parser.add_argument("-a", "--alpha", help="Sets alpha, probability weight for terrain pheromone influence. [0,1] \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0)
1588 argument_parser.add_argument("-b", "--beta", help="Sets beta, probability weight for terrain pheromone influence. [0,1] \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0) 1603 argument_parser.add_argument("-b", "--beta", help="Sets beta, probability weight for terrain path influence. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=1.0)
1589 argument_parser.add_argument("-er", "--ER", help="Pheromone evaporation rate. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.2) 1604 argument_parser.add_argument("-er", "--ER", help="Pheromone evaporation rate. \n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.2)
1590 argument_parser.add_argument("-Cstrw", "--Cstrweight", help="Structure constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.5) 1605 argument_parser.add_argument("-Cstrw", "--Cstrweight", help="Structure constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=0.5)
1591 argument_parser.add_argument("-Cgcw", "--Cgcweight", help="GC content constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=5.0) 1606 argument_parser.add_argument("-Cgcw", "--Cgcweight", help="GC content constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n", type=float, default=5.0)
1592 argument_parser.add_argument("-Cseqw", "--Cseqweight", help="Sequence constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n\n", type=float, default=1.0) 1607 argument_parser.add_argument("-Cseqw", "--Cseqweight", help="Sequence constraint quality weighting factor. [0,1]\n(DEFAULT: %(default)s, TYPE: %(type)s)\n\n\n", type=float, default=1.0)
1593 argument_parser.add_argument("-v", "--verbose", help="Displayes intermediate output.\n\n", action="store_true") 1608 argument_parser.add_argument("-ov", "--output_verbose", help="Displayes intermediate output.\n\n", action="store_true")
1594 argument_parser.add_argument("-ov", "--output_verbose", help="Prints additional output to the headers of the produced sequences.\n\n", action="store_false") 1609 argument_parser.add_argument("-v", "--verbose", help="Prints additional features and stats to the headers of the produced sequences. Also adds the structure of the sequence.\n\n", action="store_true")
1595 1610
1596 args = argument_parser.parse_args() 1611 args = argument_parser.parse_args()
1597 1612
1598 execute(args) 1613 execute(args)
1599 1614