Mercurial > repos > yating-l > regtools_junctions_extract
comparison validator.py @ 2:cf258ca024ff draft
planemo upload commit ca69686dfafcabb815c93fd46d3c4dfe57459e39-dirty
| author | yating-l |
|---|---|
| date | Mon, 19 Dec 2016 14:49:37 -0500 |
| parents | 2626c5b4c665 |
| children | 24fc8a8efe19 |
comparison
equal
deleted
inserted
replaced
| 1:e9dcea52d079 | 2:cf258ca024ff |
|---|---|
| 5 create a column and move the score column to that column. | 5 create a column and move the score column to that column. |
| 6 """ | 6 """ |
| 7 def checkAndFixBed(bedfile, revised_file): | 7 def checkAndFixBed(bedfile, revised_file): |
| 8 # Store the lines that have been removed | 8 # Store the lines that have been removed |
| 9 removedLines = [] | 9 removedLines = [] |
| 10 scoreLines = [] | |
| 10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 | 11 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 |
| 11 with open(revised_file, 'w') as tmp: | 12 with open(revised_file, 'w') as tmp: |
| 12 with open(bedfile, 'r') as f: | 13 with open(bedfile, 'r') as f: |
| 13 lines = f.readlines() | 14 lines = f.readlines() |
| 14 i = 1 | 15 i = 1 |
| 15 for line in lines: | 16 for line in lines: |
| 16 fields = line.split() | 17 fields = line.split() |
| 17 strand = fields[5] | 18 strand = fields[5] |
| 18 score = fields[4] | 19 score = fields[4] |
| 19 fields[4] = '1000' | 20 if (int(fields[4]) > 1000): |
| 20 fields.append(score) | 21 scoreLines.append("line" + str(i) + ":" + line) |
| 22 fields[4] = '1000' | |
| 21 if (strand == '+' or strand == '-'): | 23 if (strand == '+' or strand == '-'): |
| 22 tmp.write('\t'.join(map(str, fields))) | 24 tmp.write('\t'.join(map(str, fields))) |
| 23 tmp.write("\n") | 25 tmp.write("\n") |
| 24 else: | 26 else: |
| 25 removedLines.append("line" + str(i) + ": " + line) | 27 removedLines.append("line" + str(i) + ": " + line) |
| 26 i = i+1 | 28 i = i+1 |
| 27 | 29 |
| 28 return removedLines | 30 return removedLines, scoreLines |
| 29 | 31 |
| 30 def main(): | 32 def main(): |
| 31 inputfile = str(sys.argv[1]) | 33 inputfile = str(sys.argv[1]) |
| 32 outputfile = str(sys.argv[2]) | 34 outputfile = str(sys.argv[2]) |
| 33 removed = checkAndFixBed(inputfile, outputfile) | 35 removed, changed = checkAndFixBed(inputfile, outputfile) |
| 34 if (removed != []): | 36 if (removed != []): |
| 35 print "\nRemoved invalid lines: \n" | 37 print "\nRemoved invalid lines: \n" |
| 36 print "\n".join(removed) | 38 print "\n".join(removed) |
| 39 if (changed != []): | |
| 40 print "\nThe following lines have scores > 1000, so they are changed to 1000:\n" | |
| 41 print "\n".join(changed) | |
| 37 | 42 |
| 38 if __name__ == "__main__": | 43 if __name__ == "__main__": |
| 39 main() | 44 main() |
