Mercurial > repos > proteore > proteore_venn_diagram

--- a/venn_diagram.py	Thu Dec 13 04:19:23 2018 -0500
+++ b/venn_diagram.py	Mon May 13 09:55:45 2019 -0400
@@ -47,20 +47,24 @@
         if input_type == "file":
             header = inputs[i][3]
             ncol = inputs[i][4]
-            file_content = open(input_file, "r").readlines()
+            with open(input_file,"r") as handle :
+                file_content = csv.reader(handle,delimiter="\t")
+                file_content = list(file_content)   #csv object to list

-            # Check if column number is in right form
-            if isnumber("int", ncol.replace("c", "")):
-                if header == "true":
-                    file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content[1:]]]     # take only first IDs
+                # Check if column number is in right form
+                if isnumber("int", ncol.replace("c", "")):
+                    if header == "true":
+                        file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]]     # gets ids from defined column
+                    else:
+                        file_content = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]]
                 else:
-                    file_content = [x.strip() for x in [line.split("\t")[int(ncol.replace("c", ""))-1].split(";")[0] for line in file_content]]     # take only first IDs
-            else:
-                raise ValueError("Please fill in the right format of column number")
+                    raise ValueError("Please fill in the right format of column number")
         else:
             ids = set()
             file_content = inputs[i][0].split()
+            file_content = [x.split(";") for x in file_content]

+        file_content = [item.strip() for sublist in file_content for item in sublist if item != '']   #flat list of list of lists, remove empty items
         ids.update(file_content)
         if 'NA' in ids : ids.remove('NA')
         comp_dict[title] = ids
--- a/venn_diagram.xml	Thu Dec 13 04:19:23 2018 -0500
+++ b/venn_diagram.xml	Mon May 13 09:55:45 2019 -0400
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<tool id="Jvenn" name="Venn diagram" version="2018.12.12">
+<tool id="Jvenn" name="Venn diagram" version="2019.05.13">
 	<description>[JVenn]
 	</description>
 	<command><![CDATA[
@@ -8,41 +8,53 @@
 		#for $i, $s in enumerate($series)
 		    --input
 		    #if $s.se.input == "file"
-		        "$s.se.file" "$s.se.name" "file" "$s.se.header" "$s.se.ncol"
+		        "$s.se.file" "$s.name" "file" "$s.se.header" "$s.se.ncol"
 		    #else
-                "$s.se.list" "$s.se.name" "list"
+                "$s.se.list" "$s.name" "list"
  		    #end if
 		#end for
 		--summary "$output_summary"
 	]]></command>
 	<inputs>
 		<!-- Files -->
-		<repeat name="series" title="Lists to compare" min="2" max="6" >
+		<repeat name="series" title="List to compare" min="2" max="6" >
             <conditional name="se" >
-                    <param type="select" name="input" label="Please provide your identifiers" help="Copy/paste or ID list from a file (e.g. table)" >
-                        <option value="list">Copy/paste your identifiers </option>
-                        <option value="file" selected="true">Input file containing your identifiers</option>
-                    </param>
-                    <when value="file">
-                        <param type="data" name="file" format="txt,tabular" label="Choose a file that contains your list of IDs" />
-                        <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does your input file contain header?" />
-		            	<param type="text" name="ncol" value="c1" label="Please specify the column where you would like to apply the comparison" help ='For example, fill in "c1" if you want to filter the first column' />
-		            	<param type="text" name="name" value="" label="Please enter the name of this list" help="This name will be displayed on venn diagram" />
-                    </when>
-                    <when value="list">
-                        <param type="text" name="list" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
-                            <sanitizer>
-                                <valid initial="string.printable">
-                                    <remove value="&apos;"/>
-                                </valid>
-                                <mapping initial="none">
-                                    <add source="&apos;" target="__sq__"/>
-                                </mapping>
-                            </sanitizer>
-                        </param>
-                        <param type="text" name="name" value="" label="Please enter the name of this list" help="This name will be displayed on venn diagram" />
-                    </when>
-                </conditional>
+				<param type="select" name="input" label="Enter your list" help="Copy/paste or from a file (e.g. table)" >
+					<option value="list">Copy/paste list </option>
+					<option value="file" selected="true">Input file containing your list</option>
+				</param>
+				<when value="file">
+					<param type="data" name="file" format="txt,tabular" label="Select your file" />
+					<param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+					<param type="text" name="ncol" value="c1" label="Column number on which apply the comparison" help ='For example, fill in "c1" if you want to filter the first column'>
+						<validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+					</param>
+				</when>
+				<when value="list">
+					<param type="text" name="list" label="Copy/paste list" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
+						<sanitizer>
+							<valid initial="string.printable">
+								<remove value="&apos;"/>
+								<remove value="/"/>
+							</valid>
+							<mapping initial="none">
+								<add source="&apos;" target="__sq__"/>
+							</mapping>
+						</sanitizer>
+					</param>
+				</when>
+			</conditional>
+			<param type="text" name="name" value="" label="Enter the name of this list" help="This name will be displayed on venn diagram" optional="false">
+				<sanitizer>
+					<valid initial="string.printable">
+						<remove value="&apos;"/>
+					</valid>
+					<mapping initial="none">
+						<add source="&apos;" target="__sq__"/>
+					</mapping>
+				</sanitizer>
+				<validator type="regex" message="Please enter a list name">[a-zA-Z0-9._-]+</validator>
+			</param>
 		</repeat>
 	</inputs>
 	<outputs>
@@ -82,42 +94,67 @@
 	        <output name="output_text" file="Venn_text_output.txt" />
 	    </test>
 	</tests>
-	<help>
+	<help><![CDATA[
+
+**Description**

-This tool draw a venn diagram from lists/files using Jvenn plug-in (http://jvenn.toulouse.inra.fr/app/index.html). It also creates output files that contain common or specific elements between query and each compared lists/files.
+This tool is used for cross-comparison purpose between several lists having a common key (e.g. IDs) and draw a venn diagram from lists/files using the Jvenn plug-in (see Authors below).
+It also creates output text file that contain common or specific elements between query and each compared lists/files.
+
+-----

 **Inputs**

-* **Query file:** A file containing different information of proteins, could be output of previous components.
+Can be either a list entered in a copy/paste mode or a single or multi-columns file (txt, tsv, csv, tab, output from other tools) up to six lists/files

-* **File of a list of IDs:** .TXT format, each line contains 1 ID
+* **List of IDs in a copy/paste mode:** IDs have to be separated by a space (e.g. AMY1A ALB IGKC CSTA IGHA1 ACTG1)
+
+* **for example an IDs list file** in .txt format, with 1 ID per line

-  AMY1A
+.. csv-table:: tab1
+    :header: "Ids"
+

-  ALB
-
-  IGKC
+    "AMY1A"
+    "ALB"
+    "IGKC"
+    "CSTA"
+    "IGHA1"
+
+If you use a file as input list, it is necessary to specify the column number on which to apply the comparison.

-  CSTA
+Ids in a line will be split by ";", so if you have this kind of input :

-  IGHA1
+.. csv-table:: tab with multiple ids per line
+    :header: "Ids"

-  ACTG1
-
-* **List of IDs:** IDs separated by a space
+    "P22531"
+    "P04792"
+    "P01834"
+    "Q96KK5"
+    "Q06830;P60709;P13646;P31949"
+    "P06702"
+    "P14923"
+    "Q13835"

-  AMY1A ALB IGKC CSTA IGHA1 ACTG1
+All ids will be used (Q06830,P60709,P13646 and P31949 will be split).

-If you choose a file, it is necessary to specify the column where you would like to perform the comparison.
+-----

-**Outputs**
+**Parameter**
+
+"Please enter the name of this list": each list or set should be named, this information will be reported in both output (see below)
+
+-----

-* **Summary file** (venn_diagram_summary.html):
-    Venn diagram: Could be downloaded as image (PNG, SVG)
+**Output**

-* **Venn text output file**
-    A text file containing common/specific elements among compared lists/files.
-
+Two outputs are generated:
+
+* **Graphical file**: Venn diagram that you can either display (interactive mode) or download as image (PNG, SVG format)
+
+* **Venn text output file** : a text file containing specific elements or shared by lists/files.
+
 -----

 .. class:: infomark
@@ -135,8 +172,7 @@
 Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit,Migale Bioinformatics platform

 Contact support@proteore.org for any questions or concerns about the Galaxy implementation of this tool.
-
-	</help>
-	<citations>
-    </citations>
+]]></help>
+   <citations>
+   </citations>
 </tool>