Mercurial > repos > iuc > cat_prepare
comparison tabpad.py @ 0:95f0873faec1 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
| author | iuc |
|---|---|
| date | Tue, 10 Dec 2019 21:03:35 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:95f0873faec1 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import re | |
| 5 | |
| 6 | |
| 7 def padfile(infile, outfile, fieldcnt=None): | |
| 8 with open(infile, 'r') as fh: | |
| 9 out = open(outfile, 'w') | |
| 10 commentlines = [] | |
| 11 tabs = '\t' * fieldcnt if fieldcnt is not None else None | |
| 12 | |
| 13 def pad_line(txtline, tabs=None): | |
| 14 line = txtline.rstrip('\r\n') | |
| 15 fields = line.split('\t') | |
| 16 if not tabs: | |
| 17 tabs = '\t' * len(fields) | |
| 18 out.write('%s%s\n' % (line, tabs[len(fields):])) | |
| 19 | |
| 20 for i, txtline in enumerate(fh): | |
| 21 if txtline.lstrip().startswith('#'): | |
| 22 commentlines.append(txtline) | |
| 23 else: | |
| 24 if commentlines: | |
| 25 for i in range(len(commentlines) - 1): | |
| 26 out.write(commentlines[i]) | |
| 27 pad_line(commentlines[-1], tabs=tabs) | |
| 28 commentlines = [] | |
| 29 pad_line(txtline, tabs=tabs) | |
| 30 out.close() | |
| 31 | |
| 32 | |
| 33 def fieldcount(infile): | |
| 34 fieldcnt = 0 | |
| 35 with open(infile, 'r') as fh: | |
| 36 for i, line in enumerate(fh): | |
| 37 fieldcnt = max(fieldcnt, len(line.rstrip('\r\n').split('\t'))) | |
| 38 return fieldcnt | |
| 39 | |
| 40 | |
| 41 def tsvname(infile): | |
| 42 return re.sub('.txt$', '', infile) + '.tsv' | |
| 43 | |
| 44 | |
| 45 def __main__(): | |
| 46 parser = argparse.ArgumentParser( | |
| 47 description='Pad a file with TABS for equal field size across lines') | |
| 48 parser.add_argument( | |
| 49 '-i', '--input', help='input file') | |
| 50 parser.add_argument( | |
| 51 '-o', '--output', help='output file') | |
| 52 parser.add_argument( | |
| 53 'files', nargs='*', help='.txt files') | |
| 54 args = parser.parse_args() | |
| 55 | |
| 56 if args.input: | |
| 57 outfile = args.output if args.output else tsvname(args.input) | |
| 58 fieldcnt = fieldcount(args.input) | |
| 59 padfile(args.input, outfile, fieldcnt=fieldcnt) | |
| 60 for infile in args.files: | |
| 61 outfile = tsvname(infile) | |
| 62 fieldcnt = fieldcount(infile) | |
| 63 padfile(infile, outfile, fieldcnt=fieldcnt) | |
| 64 | |
| 65 | |
| 66 if __name__ == "__main__": | |
| 67 __main__() |
