changeset 0:7f669682f4ac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
author iuc
date Mon, 06 Oct 2025 12:13:07 +0000
parents
children 1090ae5e7b29
files README.rst ena_webin_cli.xml process_input.py test-data/phiX2.agp test-data/phiX2.fasta test-data/phiX2.fasta.gz test-data/phiX3.fasta test-data/phiX3.tsv test-data/receipt_sample.txt test-data/receipt_sample_nophiX2.txt test-data/receipt_sample_phiX3.txt test-data/sample_alias_001.fasta.gz
diffstat 12 files changed, 1069 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,39 @@
+Webin CLI wrapper
+======================
+the submission of genome assemblies using the 
+[Webin-cli](https://github.com/enasequence/webin-cli)
+
+This is a Galaxy wrapper for `webin cli <https://github.com/enasequence/webin-cli>`__  tool from `ENA <https://www.ebi.ac.uk/ena/browser/home>`__.
+
+
+Setting up credentials on Galaxy
+--------------------------------
+
+The admin of the server can set up global credentials through a file
+with the format:
+
+.. code-block:: yaml
+
+   username: webin_id
+   password: webin_password
+
+The path to this file must be exported as an environment variable called
+$GALAXY_ENA_SECRETS
+
+Alternatively, the admin can enable users to set their own credentials
+for this tool. To enable it, make sure the file
+``config/user_preferences_extra_conf.yml`` has the following section:
+
+.. code-block:: yaml
+
+       ena_webin_account:
+           description: Your ENA Webin account details
+           inputs:
+               - name: webin_id
+                 label: ENA Webin ID
+                 type: text
+                 required: False
+               - name: password
+                 label: Password
+                 type:  password
+                 required: False
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ena_webin_cli.xml	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,518 @@
+<tool id="ena_webin_cli" name="ENA Webin CLI" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="MIT" profile="24.2">
+    <description>Submission of consensus sequences to the European Nucleotide Archive (ENA)</description>
+    <macros>
+        <token name="@TOOL_VERSION@">9.0.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ena-webin-cli</requirement>
+        <requirement type="package" version="5.3">pyyaml</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+## -----------------------------------------------------------------------------
+## 1) Initialize log and credentials
+## -----------------------------------------------------------------------------
+
+## Truncate (or create) the Galaxy-captured Webin-CLI log dataset.
+: > "$webin_cli_log";
+
+## Default path where the <configfile name="credentials"> content will be written.
+## In dry-run we override this with a fake file so tests don’t require real creds.
+#set $credentials = 'credentials'
+#if $dry_run == "true":
+    #set $credentials = './test_fake_path'
+    touch $credentials;
+    echo "username:test_user" >> "$credentials";
+    echo "password:test_password" >> "$credentials";
+#end if
+
+## Extract username/password from the credentials file.
+## NOTE: $webin_id appears in bash, so we escape it as \$webin_id.
+webin_id=`grep 'username' $credentials | cut -d':' -f2,2`;
+if [ -z "\$webin_id" ]; then
+    ## Fail early with guidance if no username is present.
+    echo "No ENA credentials defined. Set your credentials via: User -> Preferences -> Manage Information" >&2;
+    exit 1;
+else
+  ## Pull the password similarly.
+  password=`grep 'password' $credentials | cut -d':' -f2,2`;
+fi;
+
+## -----------------------------------------------------------------------------
+## 2) Create a base manifest (fields common to all submissions)
+## -----------------------------------------------------------------------------
+
+## Name of the base manifest template (we append more fields later).
+#set $manifest_base = 'manifest_base.tab'
+
+## Working dirs: per-sample manifests and input sequences.
+mkdir -p manifests;
+mkdir -p fasta;
+
+## Write assembly-level fields to the base manifest.
+echo -e 'ASSEMBLY_TYPE\t$assembly_type' > $manifest_base;
+echo -e 'COVERAGE\t$coverage' >> $manifest_base;
+echo -e 'PROGRAM\t$assembly_program' >> $manifest_base;
+#if $min_gap_length:
+    echo -e 'MINGAPLENGTH\t$min_gap_length' >> $manifest_base;
+#end if
+echo -e 'MOLECULETYPE\t$molecule_type' >> $manifest_base;
+
+## -----------------------------------------------------------------------------
+## 3) Build per-sample manifests depending on metadata workflow
+## -----------------------------------------------------------------------------
+
+#if $metadata_file_or_form.metadata_format == "file":
+    ## --------------------------- FILE-DRIVEN WORKFLOW ------------------------
+    ## For each selected FASTA:
+    ##   - if uncompressed (.fasta), gzip it into ./fasta/<name>.fasta.gz
+    ##   - if already .fasta.gz, symlink it into ./fasta
+    #import re
+    #for $file in $metadata_file_or_form.genome_fasta:
+        #if $file.is_of_type('fasta'):
+            #set $full_name = $file.element_identifier + '.gz'
+            gzip -c '$file' > './fasta/$full_name';
+        #else:
+            ln -s '$file' './fasta/$file.element_identifier';
+        #end if
+    #end for
+
+    ## Optionally link AGP and chromosome list files if provided (one per sample).
+    #if $metadata_file_or_form.agp_file:
+        #for $file in $metadata_file_or_form.agp_file:
+            ln -s '$file' './fasta/$file.element_identifier';
+        #end for
+    #end if
+    #if $metadata_file_or_form.chr_list_file:
+        #for $file in $metadata_file_or_form.chr_list_file:
+            #set $chr_name = $file.element_identifier + '.gz'
+            gzip -c '$file' > './fasta/$chr_name';
+        #end for
+    #end if
+
+    ## Helper script:
+    ##   - parses ENA receipt (study/sample accessions, platform),
+    ##   - writes per-sample manifests into ./manifests using $manifest_base,
+    ##   - emits "submit_list.tab" (one manifest path per line).
+    python3 '$__tool_directory__/process_input.py' $metadata_file_or_form.ena_receipt $genome_fasta_files './manifests' $manifest_base >> "$webin_cli_log" 2>&1;
+
+    ## Extract center name from the receipt (used as -centerName).
+    center_name=`grep 'center_name' $metadata_file_or_form.ena_receipt | cut -f2,2 | tr -d '\n'`;
+
+    ## Log if submit_list.tab exists, and dump its content for debugging.
+    if [ -s submit_list.tab ]; then
+        echo "submit_list.tab present:" >> "$webin_cli_log" 2>&1;
+        cat submit_list.tab >> "$webin_cli_log" 2>&1;
+    else
+        echo "submit_list.tab is missing or empty" >> "$webin_cli_log" 2>&1;
+    fi;
+
+#else:
+    ## --------------------------- FORM-DRIVEN WORKFLOW ------------------------
+    ## Single-manifest flow: copy base, then append form fields.
+    #set $generated_manifest='./manifests/generated_manifest.txt'
+    cp $manifest_base $generated_manifest;
+
+    ## Use local Cheetah vars for readability.
+    #set $study_id = $metadata_file_or_form.study_accession
+    #set $sample_id = $metadata_file_or_form.sample_accession
+
+    ## Required accessions.
+    echo -e 'STUDY\t$study_id' >> $generated_manifest;
+    echo -e 'SAMPLE\t$sample_id' >> $generated_manifest;
+
+    ## center_name is given by the user in this path.
+    center_name='$metadata_file_or_form.center_name';
+
+    ## Assembly name and platform.
+    echo -e 'NAME\t$metadata_file_or_form.assembly_name' >> $generated_manifest;
+    echo -e 'PLATFORM\t$metadata_file_or_form.sequencing_platform' >> $generated_manifest;
+
+    ## Normalize FASTA name referenced by the manifest.
+    #if $metadata_file_or_form.genome_fasta.is_of_type('fasta'):
+        #set $fasta_file_name = $metadata_file_or_form.genome_fasta.element_identifier + '.gz'
+        gzip -c '$metadata_file_or_form.genome_fasta' > $fasta_file_name;
+    #else:
+        #set $fasta_file_name = $metadata_file_or_form.genome_fasta.element_identifier
+    #end if
+    echo -e 'FASTA\t$fasta_file_name' >> $generated_manifest;
+
+    ## Optional extras for chromosome-scale assemblies.
+    #if $metadata_file_or_form.agp_file:
+        echo -e 'AGP\t$metadata_file_or_form.agp_file.element_identifier' >> $generated_manifest;
+    #end if
+    #if $metadata_file_or_form.chr_list_file:
+        ## If the name ends with .tsv, gzip it and reference the .gz; else use as-is.
+        #set $chr_file_name = $metadata_file_or_form.chr_list_file.element_identifier + '.gz'
+        gzip -c '$metadata_file_or_form.chr_list_file' > $chr_file_name;
+        echo -e 'CHROMOSOME_LIST\t$chr_file_name' >> $generated_manifest;
+    #end if
+#end if
+
+## -----------------------------------------------------------------------------
+## 4) Prepare output directory and build CLI flags safely with Cheetah
+## -----------------------------------------------------------------------------
+
+## Webin-CLI will write receipts/logs under this directory (we later tar it).
+#set $outputs_dir = 'outputs'
+mkdir -p "$outputs_dir";
+
+## Build flags using #set (safer than inline #if within a single shell line).
+#set $test_flag = ''
+#if $submit_test == "true":
+    #set $test_flag = ' -test'
+#end if
+
+## By default we submit; in dry_run we validate instead.
+#set $action_flag = ' -submit'
+#if $dry_run == "true":
+    #set $action_flag = ' -validate'
+#end if
+
+## -----------------------------------------------------------------------------
+## 5) Execute Webin-CLI
+## -----------------------------------------------------------------------------
+
+#if $metadata_file_or_form.metadata_format == "file":
+    ## Loop over each manifest written by process_input.py (submit_list.tab).
+    while IFS= read -r line; do
+        ## Extract the manifest path (first whitespace-delimited field).
+        manifest=`echo "\$line" | cut -d' ' -f1`;
+
+        ## Log which manifest we are submitting.
+        echo "Submitting manifest \$manifest" >> "$webin_cli_log" 2>&1;
+
+        ## Invoke Webin-CLI with computed flags.
+        ena-webin-cli -context genome -manifest "\$manifest" -userName "'\$webin_id'" -password "'\$password'" -centerName "'\$center_name'"  -inputDir './fasta' $test_flag $action_flag -outputDir $outputs_dir >> '$webin_cli_log' 2>&1 || true;
+    done < submit_list.tab;
+
+#else:
+    ## Single run in "form" mode with the one generated manifest.
+    ena-webin-cli -context genome -manifest $generated_manifest -userName "'\$webin_id'" -password "'\$password'" -centerName "'\$center_name'" -inputDir ./  $test_flag $action_flag -outputDir $outputs_dir >> "$webin_cli_log" 2>&1 || true;
+#end if
+
+## -----------------------------------------------------------------------------
+## 6) Package outputs for Galaxy
+## -----------------------------------------------------------------------------
+
+## Tar up the Webin-CLI output directory so Galaxy can collect a single dataset.
+tar -cf $webin_cli_outputs $outputs_dir ;
+]]></command>
+
+    <!--
+      Config files rendered by Galaxy *before* the command runs.
+      They are plain text files placed in the job working directory and referenced above.
+    -->
+    <configfiles>
+        <!-- Credentials file:
+             Pulls stored ENA Webin details (if set) from the Galaxy user preferences and writes
+             simple "username:..." and "password:..." lines. The command reads from this file. -->
+        <configfile name="credentials"><![CDATA[
+#set $webin_id = $__user__.extra_preferences.get('ena_webin_account|webin_id', "").strip()
+#set $password = $__user__.extra_preferences.get('ena_webin_account|password', "").strip()
+#if $webin_id != "":
+    username:$webin_id
+    password:$password
+#end if
+        ]]></configfile>
+
+        <!-- genome_fasta_files:
+             In "file" mode, build a JSON array containing the *element_identifier* (dataset name)
+             for each selected FASTA. process_input.py uses these names to derive sample aliases. -->
+        <configfile name="genome_fasta_files">
+#import json
+#import re
+#if $metadata_file_or_form.metadata_format == "file":
+    #set $fasta_files_list = list()
+    #for $file in $metadata_file_or_form.genome_fasta:
+        $fasta_files_list.append(str($file.element_identifier))
+    #end for
+    #echo json.dumps($fasta_files_list)
+#end if
+        </configfile>
+    </configfiles>
+
+    <!--
+      User-facing inputs:
+      - Assembly-level parameters
+      - Choice of metadata workflow (file vs form) with corresponding fields
+      - Submission toggles for ENA test server and validation-only
+    -->
+    <inputs>
+        <param name="assembly_type" type="select" label="Assembly type">
+            <option value="clone">Clone</option>
+            <option value="isolate">Isolate</option>
+            <option value="COVID-19 outbreak">COVID-19 outbreak</option>
+        </param>
+        <param name="assembly_program" type="text" optional="False" label="Assembly program"/>
+        <param name="molecule_type" type="select" label="Molecule type">
+            <option value="genomic RNA" selected="True">genomic RNA</option>
+            <option value="viral cRNA">viral cRNA</option>
+            <option value="genomic DNA">genomic DNA</option>
+        </param>
+        <param name="coverage" type="float" optional="False" value="10000" label="Coverage"/>
+        <param name="min_gap_length" type="text" optional="True" label="Minimum gap length (optional)"/>
+
+        <conditional name="metadata_file_or_form">
+            <param name="metadata_format" type="select" label="Select the method to load study and sample metadata">
+                <option value="form" selected="True">Fill in required submission metadata</option>
+                <option value="file">I used the Galaxy ENA upload tool to submit the raw data: parse the received submission receipt</option>
+            </param>
+
+            <!-- FILE workflow: receipt + multiple FASTA (+ optional AGP/TSV) -->
+            <when value="file">
+                <param type="data" format="txt" name="ena_receipt" optional="False" label="Submission receipt obtained from ENA upload tool"/>
+                <param name="genome_fasta" type="data" optional="False" label="Select the consensus sequence assembly files or a collection of them. Use following syntax: sample_alias.fasta or sample_alias.fasta.gz" format="fasta,fasta.gz" multiple="true"/>
+                <param name="agp_file" type="data" optional="True" label="Sequences in AGP format. Use following syntax: sample_alias.agp" format="agp" multiple="true"/>
+                <param name="chr_list_file" type="data" optional="True" label="Chromosome List File, must be provided when the submission contains assembled chromosomes. Use following syntax: sample_alias.tsv" format="tsv" multiple="true"/>
+            </when>
+
+            <!-- FORM workflow: single, user-specified submission -->
+            <when value="form">
+                <param name="assembly_name" type="text" optional="False" label="Assembly name"/>
+                <param name="study_accession" type="text" optional="False" label="Study accession"/>
+                <param name="sample_accession" type="text" optional="False" label="Sample accession"/>
+                <param name="sequencing_platform" type="text" optional="False" label="Sequencing platform"/>
+                <param name="description" type="text" optional="True" value="" label="Description" help="Free text description of the genome assembly (optional)"/>
+                <param name="center_name" type="text" optional="False" label="Center name"/>
+                <param name="genome_fasta" type="data" optional="False" label="Select the consensus sequence assembly file" format="fasta,fasta.gz"/>
+                <param name="agp_file" type="data" optional="True" label="Sequences in AGP format." format="agp"/>
+                <param name="chr_list_file" type="data" optional="True" label="Chromosome List File, must be provided when the submission contains assembled chromosomes." format="tsv"/>
+            </when>
+        </conditional>
+
+        <!-- Submission controls -->
+        <param name="submit_test" type="boolean" truevalue="true" falsevalue="false" label="Submit to ENA test server" help="Uploads to the test server of ENA will not be made public and will be removed automatically in 24 hours. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find these uploads at https://wwwdev.ebi.ac.uk/ena/." />
+        <param name="dry_run" type="boolean" truevalue="true" falsevalue="false" label="Validate files and metadata but do not submit" help="Generate input files and run Webin-CLI with -validate option. If 'No' is selected then it will validate and submit (-submit flag)"/>
+    </inputs>
+
+    <!--
+      Outputs:
+      - generated_manifests: discovered in manifests/ (via regex) for transparency
+      - webin_cli_log: combined stdout/stderr + helper echo statements
+      - webin_cli_outputs: tar archive of the Webin-CLI output directory
+    -->
+    <outputs>
+        <collection name="generated_manifests" type="list" label="Generated manifests">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt" ext="txt" directory="manifests/"/>
+        </collection>
+        <data name="webin_cli_log" label="ENA submission log" format="txt"/>
+        <data name="webin_cli_outputs" label="Webin cli outputs" format="tar"/>
+        
+    </outputs>
+
+    <tests>
+        <!-- Test 1: FORM workflow, no chr/AGP -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="viral cRNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="form"/>
+                <param name="assembly_name" value="Test assembly name"/>
+                <param name="study_accession" value="PRJEB49173"/>
+                <param name="sample_accession" value="SAMEA11953908"/>
+                <param name="sequencing_platform" value="Nanopore 0011"/>
+                <param name="description" value="Test Description"/>
+                <param name="center_name" value="Test center name"/>
+                <param name="genome_fasta" value="phiX2.fasta"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\."/>
+                </assert_contents>
+            </output>
+            <output_collection name="generated_manifests">
+                <element name="generated_manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/>
+                        <has_text_matching expression="(?m)^AGP\t" negate="true"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 2: FORM workflow, chr list present -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="genomic DNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="form"/>
+                <param name="assembly_name" value="Test assembly name"/>
+                <param name="study_accession" value="PRJEB49173"/>
+                <param name="sample_accession" value="SAMEA11953908"/>
+                <param name="sequencing_platform" value="Nanopore 0011"/>
+                <param name="description" value="Test Description"/>
+                <param name="center_name" value="Test center name"/>
+                <param name="genome_fasta" value="phiX3.fasta"/>
+                <param name="chr_list_file" value="phiX3.tsv"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\."/>
+                </assert_contents>
+            </output>
+            <output_collection name="generated_manifests">
+                <element name="generated_manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tphiX3\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\tphiX3\.tsv\.gz$"/>
+                        <has_text_matching expression="(?m)^AGP\t" negate="true"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 3: FILE workflow, two FASTAs; one missing metadata -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="viral cRNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="file"/>
+                <param name="ena_receipt" value="receipt_sample_nophiX2.txt"/>
+                <param name="genome_fasta" value="phiX2.fasta.gz,sample_alias_001.fasta.gz"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="Processing phiX2"/>
+                    <has_text_matching expression="No metadata found for sample phiX2"/>
+                    <has_text_matching expression="Processing sample_alias_001"/>
+                    <has_text_matching expression="Submitting manifest .*manifests/sample_alias_001\.manifest\.txt"/>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/>
+                </assert_contents>
+            </output>
+            <output_collection name="generated_manifests">
+                <element name="sample_alias_001.manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tsample_alias_001\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/>
+                        <has_text_matching expression="(?m)^AGP\t" negate="true"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 4: FILE workflow, single FASTA with metadata -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="viral cRNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="file"/>
+                <param name="ena_receipt" value="receipt_sample.txt"/>
+                <param name="genome_fasta" value="sample_alias_001.fasta.gz"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="Processing sample_alias_001"/>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!-- Test 5: FILE workflow, AGP for phiX2 -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="genomic DNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="file"/>
+                <param name="ena_receipt" value="receipt_sample.txt"/>
+                <param name="genome_fasta" value="phiX2.fasta"/>
+                <param name="agp_file" value="phiX2.agp"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="Processing phiX2"/>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/>
+                </assert_contents>
+            </output>
+            <output_collection name="generated_manifests">
+                <element name="phiX2.manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^AGP\tphiX2\.agp$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+
+        <!-- Test 6: FILE workflow, chr list for phiX3 + extra fasta -->
+        <test>
+            <param name="submit_test" value="true" />
+            <param name="dry_run" value="true" />
+            <param name="assembly_type" value="isolate"/>
+            <param name="assembly_program" value="Test assembly program"/>
+            <param name="molecule_type" value="genomic DNA"/>
+            <param name="coverage" value="10000"/>
+            <conditional name="metadata_file_or_form">
+                <param name="metadata_format" value="file"/>
+                <param name="ena_receipt" value="receipt_sample_phiX3.txt"/>
+                <param name="genome_fasta" value="phiX3.fasta,phiX2.fasta.gz"/>
+                <param name="chr_list_file" value="phiX3.tsv"/>
+            </conditional>
+            <param name="min_gap_length" value="30"/>
+            <output name="webin_cli_log">
+                <assert_contents>
+                    <has_text_matching expression="Processing phiX3"/>
+                    <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/>
+                </assert_contents>
+            </output>
+           <output_collection name="generated_manifests">
+                <element name="phiX2.manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/>
+                        <has_text_matching expression="(?m)^AGP\t" negate="true"/>
+                    </assert_contents>
+                </element>
+                <element name="phiX3.manifest">
+                    <assert_contents>
+                        <has_text_matching expression="(?m)^FASTA\tphiX3\.fasta\.gz$"/>
+                        <has_text_matching expression="(?m)^CHROMOSOME_LIST\tphiX3\.tsv\.gz$"/>
+                        <has_text_matching expression="(?m)^AGP\t" negate="true"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+
+
+
+    <!-- Help text + citation -->
+    <help><![CDATA[
+        This tool is a wrapper for the ENA Webin CLI submission tool (https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html).
+    
+        .. class:: warningmark
+    
+            The ENA upload tool won't work unless you have provided an ENA Webin ID in User > Preferences > Manage Information > ENA Webin account details.]]></help>
+
+    <citations>
+        <citation type="doi">10.1093/nar/gkac1051</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/process_input.py	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,132 @@
+import json
+import os
+import sys
+
+import yaml
+
+
+def get_section_string(f, start_line, end_line, return_string=False):
+    # consume starting lines
+    start_string = iter(f.readline, start_line)
+    start_string = ''.join(line for line in start_string)
+    # read YAML lines
+    yaml_string = iter(f.readline, end_line)
+    if return_string:
+        return ''.join(x for x in yaml_string)
+    else:
+        return [x for x in yaml_string]
+
+
+def fill_from_yaml_data(yaml_only_dict, studies_samples_dict):
+    # fill experiment information (platform)
+    for index, exp in yaml_only_dict['ENA_experiment'].items():
+        study_alias = exp['study_alias']
+        sample_alias = exp['sample_alias']
+        if study_alias in studies_samples_dict.keys():
+            if sample_alias in studies_samples_dict[study_alias].keys():
+                studies_samples_dict[study_alias][sample_alias]['experiments'].append({'platform': exp['platform']})
+            else:
+                studies_samples_dict[study_alias][sample_alias] = {'experiments': [{'platform': exp['platform']}]}
+        else:
+            studies_samples_dict[study_alias] = {
+                sample_alias: {'experiments': [{'platform': exp['platform']}]}
+            }
+
+
+def load_receipt_data(input_file_path):
+    # should do some health check of the input file?
+    # load yaml section
+    loaded_data = {}
+    yaml_delimiter = 'YAML -------------\n'
+    with open(input_file_path) as input_file:
+        yaml_only_section = yaml.safe_load(
+            get_section_string(input_file, start_line=yaml_delimiter, end_line=yaml_delimiter, return_string=True)
+        )
+    fill_from_yaml_data(yaml_only_section, loaded_data)
+    # read study accessions
+    study_delimiter = 'Study accession details:\n'
+    end_line = '\n'
+    with open(input_file_path) as input_file:
+        studies_accession_lines = get_section_string(input_file, start_line=study_delimiter, end_line=end_line)
+
+    for study_line in studies_accession_lines:
+        if study_line != '\n':
+            alias, accession, *_ = study_line.split('\t')
+            try:
+                loaded_data[alias]['accession'] = accession
+            except KeyError:
+                print(f"Experiment {alias} has unknown study or sample")
+
+    samples_delimiter = 'Sample accession details:\n'
+    with open(input_file_path) as input_file:
+        samples_accession_lines = get_section_string(input_file, start_line=samples_delimiter, end_line=end_line)
+
+    for sample_line in samples_accession_lines:
+        if sample_line != '\n':
+            alias, accession, *_ = sample_line.split('\t')
+            for study in loaded_data.keys():
+                if alias in loaded_data[study].keys():
+                    loaded_data[study][alias]['accession'] = accession
+                    break
+
+    return loaded_data
+
+
+def main():
+    input_file_path = sys.argv[1]
+    fasta_names_list_path = sys.argv[2]
+    out_manifest_base = sys.argv[3]
+    manifest_template = sys.argv[4]
+
+    # load submitted data from receipt file
+    data_dict = load_receipt_data(input_file_path)
+
+    # iterate over the list of fasta files
+    with open(fasta_names_list_path, 'r') as fasta_files_json_file:
+        fasta_files_list = json.load(fasta_files_json_file)
+
+    with open('submit_list.tab', 'w') as written_manifests_out:
+        for fasta_file in fasta_files_list:
+            if fasta_file.endswith('.fasta.gz'):
+                sample_alias = fasta_file[:-9]
+            else:
+                sample_alias = fasta_file[:-6]
+
+            print(f'Processing {sample_alias}')
+            found_metadata = False
+
+            for study_alias in data_dict.keys():
+                if sample_alias in data_dict[study_alias].keys():
+                    sample_accession = data_dict[study_alias][sample_alias]['accession']
+                    study_accession = data_dict[study_alias]['accession']
+                    # TODO: get a string that concatenates platform information from multiple experiments
+                    platform = data_dict[study_alias][sample_alias]['experiments'][0]['platform']
+                    manifest_path = os.path.join(out_manifest_base, sample_alias + '.manifest.txt')
+
+                    with open(manifest_path, "w") as output_handle:
+                        # dump the contents of manifest template containing global vars
+                        with open(manifest_template) as m_template:
+                            output_handle.write(m_template.read())
+
+                        output_handle.write("ASSEMBLYNAME\tconsensus_" + sample_alias + "\n")
+                        output_handle.write("PLATFORM\t" + platform + "\n")
+                        output_handle.write("STUDY\t" + study_accession + "\n")
+                        output_handle.write("SAMPLE\t" + sample_accession + "\n")
+                        output_handle.write("FASTA\t" + sample_alias + '.fasta.gz' + "\n")
+                        agp_path = os.path.join("./fasta", sample_alias + ".agp")
+                        if os.path.exists(agp_path):
+                            output_handle.write("AGP\t" + sample_alias + ".agp\n")
+                        chr_list_path = os.path.join("./fasta", sample_alias + ".tsv.gz")
+                        if os.path.exists(chr_list_path):
+                            output_handle.write("CHROMOSOME_LIST\t" + sample_alias + ".tsv.gz\n")
+
+                    found_metadata = True
+                    written_manifests_out.write(manifest_path + '\n')
+                    break
+
+            if not found_metadata:
+                print(f'No metadata found for sample {sample_alias}')
+
+
+if __name__ == '__main__':
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX2.agp	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,5 @@
+##agp-version 2.0
+# ORGANISM: Bacteriophage phiX174
+# ASSEMBLY NAME: phiX2
+# OBJECT: phiX2 is represented by a single component spanning 1..5386
+phiX2	1	5386	1	W	phiX174	1	5386	+
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX2.fasta	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,90 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+>phi2174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
Binary file test-data/phiX2.fasta.gz has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX3.fasta	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,78 @@
+>phiX174
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/phiX3.tsv	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,1 @@
+phiX174		1		segmented
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/receipt_sample.txt	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,76 @@
+YAML -------------
+ENA_experiment:
+  0:
+    alias: exp_test_alias_001
+    design_description: Lot's of coffe and magic
+    insert_size: 250.0
+    instrument_model: NextSeq 500
+    library_construction_protocol: Illumina COVIDSeq Test Kit
+    library_layout: PAIRED
+    library_name: Cov51
+    library_selection: RT-PCR
+    library_source: VIRAL RNA
+    library_strategy: AMPLICON
+    platform: ILLUMINA
+    sample_alias: sample_alias_001
+    study_alias: study_alias_001
+    title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test
+  1:
+    alias: exp_test_alias_002
+    design_description: Lot's of coffe and magic
+    insert_size: 250.0
+    instrument_model: NextSeq 500
+    library_construction_protocol: Illumina COVIDSeq Test Kit
+    library_layout: PAIRED
+    library_name: Cov51
+    library_selection: RT-PCR
+    library_source: VIRAL RNA
+    library_strategy: AMPLICON
+    platform: ILLUMINA
+    sample_alias: phiX2
+    study_alias: study_alias_001
+    title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test
+ENA_run:
+  2:
+    alias: run_alias_001
+    experiment_alias: exp_test_alias_001
+    file_format: FASTQ
+    file_name: run001.fastq.gz
+ENA_sample:
+  2:
+    alias: sample_alias_001
+    collecting institution: Umbrella Corp.
+    collection date: '2021-05-03'
+    collector name: "John Doe"
+    definition for seropositive sample: ''
+  2:
+    alias: phiX2
+    collecting institution: Umbrella Corp.
+    collection date: '2021-05-03'
+    collector name: "John Doe"
+    definition for seropositive sample: ''
+ENA_study:
+  2:
+    alias: study_alias_001
+    study_abstract: "Help"
+    study_type: Whole Genome Sequencing
+    title: Whole genome sequencing of SARS-CoV-2
+YAML -------------
+
+Printing receipt to ./receipt.xml
+
+Submission was done successfully
+
+Study accession details:
+study_alias_001	FAKE0001	2011-01-16T10:52:06.497+01:00	added
+
+Sample accession details:
+sample_alias_001	FAKESAMP001	2011-01-16T10:52:06.497+01:00	added
+phiX2	FAKESAMP002	2011-01-16T10:52:06.497+01:00	added
+
+Saving updates in new tsv tables::
+save updates in ./submission_files/studies_updated.tsv
+save updates in ./submission_files/samples_updated.tsv
+save updates in ./submission_files/experiments_updated.tsv
+save updates in ./submission_files/runs_updated.tsv
+action_option	add
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/receipt_sample_nophiX2.txt	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,54 @@
+YAML -------------
+ENA_experiment:
+  0:
+    alias: exp_test_alias_001
+    design_description: Lot's of coffe and magic
+    insert_size: 250.0
+    instrument_model: NextSeq 500
+    library_construction_protocol: Illumina COVIDSeq Test Kit
+    library_layout: PAIRED
+    library_name: Cov51
+    library_selection: RT-PCR
+    library_source: VIRAL RNA
+    library_strategy: AMPLICON
+    platform: ILLUMINA
+    sample_alias: sample_alias_001
+    study_alias: study_alias_001
+    title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test
+ENA_run:
+  2:
+    alias: run_alias_001
+    experiment_alias: exp_test_alias_001
+    file_format: FASTQ
+    file_name: run001.fastq.gz
+ENA_sample:
+  2:
+    alias: sample_alias_001
+    collecting institution: Umbrella Corp.
+    collection date: '2021-05-03'
+    collector name: "John Doe"
+    definition for seropositive sample: ''
+ENA_study:
+  2:
+    alias: study_alias_001
+    study_abstract: "Help"
+    study_type: Whole Genome Sequencing
+    title: Whole genome sequencing of SARS-CoV-2
+YAML -------------
+
+Printing receipt to ./receipt.xml
+
+Submission was done successfully
+
+Study accession details:
+study_alias_001	FAKE0001	2011-01-16T10:52:06.497+01:00	added
+
+Sample accession details:
+sample_alias_001	FAKESAMP001	2011-01-16T10:52:06.497+01:00	added
+
+Saving updates in new tsv tables::
+save updates in ./submission_files/studies_updated.tsv
+save updates in ./submission_files/samples_updated.tsv
+save updates in ./submission_files/experiments_updated.tsv
+save updates in ./submission_files/runs_updated.tsv
+action_option	add
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/receipt_sample_phiX3.txt	Mon Oct 06 12:13:07 2025 +0000
@@ -0,0 +1,76 @@
+YAML -------------
+ENA_experiment:
+  0:
+    alias: exp_test_alias_001
+    design_description: Lot's of coffe and magic
+    insert_size: 250.0
+    instrument_model: NextSeq 500
+    library_construction_protocol: Illumina COVIDSeq Test Kit
+    library_layout: PAIRED
+    library_name: Cov51
+    library_selection: RT-PCR
+    library_source: VIRAL RNA
+    library_strategy: AMPLICON
+    platform: ILLUMINA
+    sample_alias: phiX2
+    study_alias: study_alias_001
+    title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test
+  1:
+    alias: exp_test_alias_002
+    design_description: Lot's of coffe and magic
+    insert_size: 250.0
+    instrument_model: NextSeq 500
+    library_construction_protocol: Illumina COVIDSeq Test Kit
+    library_layout: PAIRED
+    library_name: Cov51
+    library_selection: RT-PCR
+    library_source: VIRAL RNA
+    library_strategy: AMPLICON
+    platform: ILLUMINA
+    sample_alias: phiX3
+    study_alias: study_alias_001
+    title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test
+ENA_run:
+  2:
+    alias: run_alias_001
+    experiment_alias: exp_test_alias_001
+    file_format: FASTQ
+    file_name: run001.fastq.gz
+ENA_sample:
+  2:
+    alias: phiX2
+    collecting institution: Umbrella Corp.
+    collection date: '2021-05-03'
+    collector name: "John Doe"
+    definition for seropositive sample: ''
+  2:
+    alias: phiX3
+    collecting institution: Umbrella Corp.
+    collection date: '2021-05-03'
+    collector name: "John Doe"
+    definition for seropositive sample: ''
+ENA_study:
+  2:
+    alias: study_alias_001
+    study_abstract: "Help"
+    study_type: Whole Genome Sequencing
+    title: Whole genome sequencing of SARS-CoV-2
+YAML -------------
+
+Printing receipt to ./receipt.xml
+
+Submission was done successfully
+
+Study accession details:
+study_alias_001	FAKE0001	2011-01-16T10:52:06.497+01:00	added
+
+Sample accession details:
+phiX2	FAKESAMP001	2011-01-16T10:52:06.497+01:00	added
+phiX3	FAKESAMP002	2011-01-16T10:52:06.497+01:00	added
+
+Saving updates in new tsv tables::
+save updates in ./submission_files/studies_updated.tsv
+save updates in ./submission_files/samples_updated.tsv
+save updates in ./submission_files/experiments_updated.tsv
+save updates in ./submission_files/runs_updated.tsv
+action_option	add
Binary file test-data/sample_alias_001.fasta.gz has changed