annotate column_remove_by_header.py @ 0:a87236651ddb draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
author iuc
date Wed, 12 Apr 2017 17:17:15 -0400
parents
children 2a5f2bc24ffa
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
1 #!/usr/bin/env python
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
2
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
3 import subprocess
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
4 import sys
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
5
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
6 AWK_CMD = """BEGIN{FS="%s"; OFS="%s";} {print %s;}"""
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
7
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
8 input_filename = sys.argv[1]
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
9 output_filename = sys.argv[2]
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
10 delimiter = sys.argv[3]
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
11 keep_columns = sys.argv[4]
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
12 strip_characters = sys.argv[5]
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
13
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
14 if keep_columns == "--keep":
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
15 keep_columns = True
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
16 else:
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
17 keep_columns = False
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
18
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
19 names = []
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
20 for name in sys.argv[6:]:
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
21 names.append( name )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
22
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
23 header = None
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
24 with open( input_filename, 'r' ) as fh:
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
25 header = fh.readline().strip( '\r\n' )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
26 header = header.split( delimiter )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
27 columns = []
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
28 for i, key in enumerate( header, 1 ):
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
29 if i == 1 and strip_characters:
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
30 key = key.lstrip( strip_characters )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
31 if ( keep_columns and key in names ) or ( not keep_columns and key not in names ):
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
32 columns.append( i )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
33 print( "Kept", len( columns ), "of", len( header ), "columns." )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
34 awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%s" % x, columns ) ) )
a87236651ddb planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/column_remove_by_header commit 2150a3264364471090b650bdffde9f9c0b47ac39
iuc
parents:
diff changeset
35 sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )