Mercurial > repos > iuc > ena_webin_cli
changeset 0:7f669682f4ac draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ena_webin_cli commit abb15194a196267142d88b9348facf9e85e601ef
| author | iuc |
|---|---|
| date | Mon, 06 Oct 2025 12:13:07 +0000 |
| parents | |
| children | 1090ae5e7b29 |
| files | README.rst ena_webin_cli.xml process_input.py test-data/phiX2.agp test-data/phiX2.fasta test-data/phiX2.fasta.gz test-data/phiX3.fasta test-data/phiX3.tsv test-data/receipt_sample.txt test-data/receipt_sample_nophiX2.txt test-data/receipt_sample_phiX3.txt test-data/sample_alias_001.fasta.gz |
| diffstat | 12 files changed, 1069 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,39 @@ +Webin CLI wrapper +====================== +the submission of genome assemblies using the +[Webin-cli](https://github.com/enasequence/webin-cli) + +This is a Galaxy wrapper for `webin cli <https://github.com/enasequence/webin-cli>`__ tool from `ENA <https://www.ebi.ac.uk/ena/browser/home>`__. + + +Setting up credentials on Galaxy +-------------------------------- + +The admin of the server can set up global credentials through a file +with the format: + +.. code-block:: yaml + + username: webin_id + password: webin_password + +The path to this file must be exported as an environment variable called +$GALAXY_ENA_SECRETS + +Alternatively, the admin can enable users to set their own credentials +for this tool. To enable it, make sure the file +``config/user_preferences_extra_conf.yml`` has the following section: + +.. code-block:: yaml + + ena_webin_account: + description: Your ENA Webin account details + inputs: + - name: webin_id + label: ENA Webin ID + type: text + required: False + - name: password + label: Password + type: password + required: False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ena_webin_cli.xml Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,518 @@ +<tool id="ena_webin_cli" name="ENA Webin CLI" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="MIT" profile="24.2"> + <description>Submission of consensus sequences to the European Nucleotide Archive (ENA)</description> + <macros> + <token name="@TOOL_VERSION@">9.0.1</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ena-webin-cli</requirement> + <requirement type="package" version="5.3">pyyaml</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ +## ----------------------------------------------------------------------------- +## 1) Initialize log and credentials +## ----------------------------------------------------------------------------- + +## Truncate (or create) the Galaxy-captured Webin-CLI log dataset. +: > "$webin_cli_log"; + +## Default path where the <configfile name="credentials"> content will be written. +## In dry-run we override this with a fake file so tests don’t require real creds. +#set $credentials = 'credentials' +#if $dry_run == "true": + #set $credentials = './test_fake_path' + touch $credentials; + echo "username:test_user" >> "$credentials"; + echo "password:test_password" >> "$credentials"; +#end if + +## Extract username/password from the credentials file. +## NOTE: $webin_id appears in bash, so we escape it as \$webin_id. +webin_id=`grep 'username' $credentials | cut -d':' -f2,2`; +if [ -z "\$webin_id" ]; then + ## Fail early with guidance if no username is present. + echo "No ENA credentials defined. Set your credentials via: User -> Preferences -> Manage Information" >&2; + exit 1; +else + ## Pull the password similarly. + password=`grep 'password' $credentials | cut -d':' -f2,2`; +fi; + +## ----------------------------------------------------------------------------- +## 2) Create a base manifest (fields common to all submissions) +## ----------------------------------------------------------------------------- + +## Name of the base manifest template (we append more fields later). +#set $manifest_base = 'manifest_base.tab' + +## Working dirs: per-sample manifests and input sequences. +mkdir -p manifests; +mkdir -p fasta; + +## Write assembly-level fields to the base manifest. +echo -e 'ASSEMBLY_TYPE\t$assembly_type' > $manifest_base; +echo -e 'COVERAGE\t$coverage' >> $manifest_base; +echo -e 'PROGRAM\t$assembly_program' >> $manifest_base; +#if $min_gap_length: + echo -e 'MINGAPLENGTH\t$min_gap_length' >> $manifest_base; +#end if +echo -e 'MOLECULETYPE\t$molecule_type' >> $manifest_base; + +## ----------------------------------------------------------------------------- +## 3) Build per-sample manifests depending on metadata workflow +## ----------------------------------------------------------------------------- + +#if $metadata_file_or_form.metadata_format == "file": + ## --------------------------- FILE-DRIVEN WORKFLOW ------------------------ + ## For each selected FASTA: + ## - if uncompressed (.fasta), gzip it into ./fasta/<name>.fasta.gz + ## - if already .fasta.gz, symlink it into ./fasta + #import re + #for $file in $metadata_file_or_form.genome_fasta: + #if $file.is_of_type('fasta'): + #set $full_name = $file.element_identifier + '.gz' + gzip -c '$file' > './fasta/$full_name'; + #else: + ln -s '$file' './fasta/$file.element_identifier'; + #end if + #end for + + ## Optionally link AGP and chromosome list files if provided (one per sample). + #if $metadata_file_or_form.agp_file: + #for $file in $metadata_file_or_form.agp_file: + ln -s '$file' './fasta/$file.element_identifier'; + #end for + #end if + #if $metadata_file_or_form.chr_list_file: + #for $file in $metadata_file_or_form.chr_list_file: + #set $chr_name = $file.element_identifier + '.gz' + gzip -c '$file' > './fasta/$chr_name'; + #end for + #end if + + ## Helper script: + ## - parses ENA receipt (study/sample accessions, platform), + ## - writes per-sample manifests into ./manifests using $manifest_base, + ## - emits "submit_list.tab" (one manifest path per line). + python3 '$__tool_directory__/process_input.py' $metadata_file_or_form.ena_receipt $genome_fasta_files './manifests' $manifest_base >> "$webin_cli_log" 2>&1; + + ## Extract center name from the receipt (used as -centerName). + center_name=`grep 'center_name' $metadata_file_or_form.ena_receipt | cut -f2,2 | tr -d '\n'`; + + ## Log if submit_list.tab exists, and dump its content for debugging. + if [ -s submit_list.tab ]; then + echo "submit_list.tab present:" >> "$webin_cli_log" 2>&1; + cat submit_list.tab >> "$webin_cli_log" 2>&1; + else + echo "submit_list.tab is missing or empty" >> "$webin_cli_log" 2>&1; + fi; + +#else: + ## --------------------------- FORM-DRIVEN WORKFLOW ------------------------ + ## Single-manifest flow: copy base, then append form fields. + #set $generated_manifest='./manifests/generated_manifest.txt' + cp $manifest_base $generated_manifest; + + ## Use local Cheetah vars for readability. + #set $study_id = $metadata_file_or_form.study_accession + #set $sample_id = $metadata_file_or_form.sample_accession + + ## Required accessions. + echo -e 'STUDY\t$study_id' >> $generated_manifest; + echo -e 'SAMPLE\t$sample_id' >> $generated_manifest; + + ## center_name is given by the user in this path. + center_name='$metadata_file_or_form.center_name'; + + ## Assembly name and platform. + echo -e 'NAME\t$metadata_file_or_form.assembly_name' >> $generated_manifest; + echo -e 'PLATFORM\t$metadata_file_or_form.sequencing_platform' >> $generated_manifest; + + ## Normalize FASTA name referenced by the manifest. + #if $metadata_file_or_form.genome_fasta.is_of_type('fasta'): + #set $fasta_file_name = $metadata_file_or_form.genome_fasta.element_identifier + '.gz' + gzip -c '$metadata_file_or_form.genome_fasta' > $fasta_file_name; + #else: + #set $fasta_file_name = $metadata_file_or_form.genome_fasta.element_identifier + #end if + echo -e 'FASTA\t$fasta_file_name' >> $generated_manifest; + + ## Optional extras for chromosome-scale assemblies. + #if $metadata_file_or_form.agp_file: + echo -e 'AGP\t$metadata_file_or_form.agp_file.element_identifier' >> $generated_manifest; + #end if + #if $metadata_file_or_form.chr_list_file: + ## If the name ends with .tsv, gzip it and reference the .gz; else use as-is. + #set $chr_file_name = $metadata_file_or_form.chr_list_file.element_identifier + '.gz' + gzip -c '$metadata_file_or_form.chr_list_file' > $chr_file_name; + echo -e 'CHROMOSOME_LIST\t$chr_file_name' >> $generated_manifest; + #end if +#end if + +## ----------------------------------------------------------------------------- +## 4) Prepare output directory and build CLI flags safely with Cheetah +## ----------------------------------------------------------------------------- + +## Webin-CLI will write receipts/logs under this directory (we later tar it). +#set $outputs_dir = 'outputs' +mkdir -p "$outputs_dir"; + +## Build flags using #set (safer than inline #if within a single shell line). +#set $test_flag = '' +#if $submit_test == "true": + #set $test_flag = ' -test' +#end if + +## By default we submit; in dry_run we validate instead. +#set $action_flag = ' -submit' +#if $dry_run == "true": + #set $action_flag = ' -validate' +#end if + +## ----------------------------------------------------------------------------- +## 5) Execute Webin-CLI +## ----------------------------------------------------------------------------- + +#if $metadata_file_or_form.metadata_format == "file": + ## Loop over each manifest written by process_input.py (submit_list.tab). + while IFS= read -r line; do + ## Extract the manifest path (first whitespace-delimited field). + manifest=`echo "\$line" | cut -d' ' -f1`; + + ## Log which manifest we are submitting. + echo "Submitting manifest \$manifest" >> "$webin_cli_log" 2>&1; + + ## Invoke Webin-CLI with computed flags. + ena-webin-cli -context genome -manifest "\$manifest" -userName "'\$webin_id'" -password "'\$password'" -centerName "'\$center_name'" -inputDir './fasta' $test_flag $action_flag -outputDir $outputs_dir >> '$webin_cli_log' 2>&1 || true; + done < submit_list.tab; + +#else: + ## Single run in "form" mode with the one generated manifest. + ena-webin-cli -context genome -manifest $generated_manifest -userName "'\$webin_id'" -password "'\$password'" -centerName "'\$center_name'" -inputDir ./ $test_flag $action_flag -outputDir $outputs_dir >> "$webin_cli_log" 2>&1 || true; +#end if + +## ----------------------------------------------------------------------------- +## 6) Package outputs for Galaxy +## ----------------------------------------------------------------------------- + +## Tar up the Webin-CLI output directory so Galaxy can collect a single dataset. +tar -cf $webin_cli_outputs $outputs_dir ; +]]></command> + + <!-- + Config files rendered by Galaxy *before* the command runs. + They are plain text files placed in the job working directory and referenced above. + --> + <configfiles> + <!-- Credentials file: + Pulls stored ENA Webin details (if set) from the Galaxy user preferences and writes + simple "username:..." and "password:..." lines. The command reads from this file. --> + <configfile name="credentials"><![CDATA[ +#set $webin_id = $__user__.extra_preferences.get('ena_webin_account|webin_id', "").strip() +#set $password = $__user__.extra_preferences.get('ena_webin_account|password', "").strip() +#if $webin_id != "": + username:$webin_id + password:$password +#end if + ]]></configfile> + + <!-- genome_fasta_files: + In "file" mode, build a JSON array containing the *element_identifier* (dataset name) + for each selected FASTA. process_input.py uses these names to derive sample aliases. --> + <configfile name="genome_fasta_files"> +#import json +#import re +#if $metadata_file_or_form.metadata_format == "file": + #set $fasta_files_list = list() + #for $file in $metadata_file_or_form.genome_fasta: + $fasta_files_list.append(str($file.element_identifier)) + #end for + #echo json.dumps($fasta_files_list) +#end if + </configfile> + </configfiles> + + <!-- + User-facing inputs: + - Assembly-level parameters + - Choice of metadata workflow (file vs form) with corresponding fields + - Submission toggles for ENA test server and validation-only + --> + <inputs> + <param name="assembly_type" type="select" label="Assembly type"> + <option value="clone">Clone</option> + <option value="isolate">Isolate</option> + <option value="COVID-19 outbreak">COVID-19 outbreak</option> + </param> + <param name="assembly_program" type="text" optional="False" label="Assembly program"/> + <param name="molecule_type" type="select" label="Molecule type"> + <option value="genomic RNA" selected="True">genomic RNA</option> + <option value="viral cRNA">viral cRNA</option> + <option value="genomic DNA">genomic DNA</option> + </param> + <param name="coverage" type="float" optional="False" value="10000" label="Coverage"/> + <param name="min_gap_length" type="text" optional="True" label="Minimum gap length (optional)"/> + + <conditional name="metadata_file_or_form"> + <param name="metadata_format" type="select" label="Select the method to load study and sample metadata"> + <option value="form" selected="True">Fill in required submission metadata</option> + <option value="file">I used the Galaxy ENA upload tool to submit the raw data: parse the received submission receipt</option> + </param> + + <!-- FILE workflow: receipt + multiple FASTA (+ optional AGP/TSV) --> + <when value="file"> + <param type="data" format="txt" name="ena_receipt" optional="False" label="Submission receipt obtained from ENA upload tool"/> + <param name="genome_fasta" type="data" optional="False" label="Select the consensus sequence assembly files or a collection of them. Use following syntax: sample_alias.fasta or sample_alias.fasta.gz" format="fasta,fasta.gz" multiple="true"/> + <param name="agp_file" type="data" optional="True" label="Sequences in AGP format. Use following syntax: sample_alias.agp" format="agp" multiple="true"/> + <param name="chr_list_file" type="data" optional="True" label="Chromosome List File, must be provided when the submission contains assembled chromosomes. Use following syntax: sample_alias.tsv" format="tsv" multiple="true"/> + </when> + + <!-- FORM workflow: single, user-specified submission --> + <when value="form"> + <param name="assembly_name" type="text" optional="False" label="Assembly name"/> + <param name="study_accession" type="text" optional="False" label="Study accession"/> + <param name="sample_accession" type="text" optional="False" label="Sample accession"/> + <param name="sequencing_platform" type="text" optional="False" label="Sequencing platform"/> + <param name="description" type="text" optional="True" value="" label="Description" help="Free text description of the genome assembly (optional)"/> + <param name="center_name" type="text" optional="False" label="Center name"/> + <param name="genome_fasta" type="data" optional="False" label="Select the consensus sequence assembly file" format="fasta,fasta.gz"/> + <param name="agp_file" type="data" optional="True" label="Sequences in AGP format." format="agp"/> + <param name="chr_list_file" type="data" optional="True" label="Chromosome List File, must be provided when the submission contains assembled chromosomes." format="tsv"/> + </when> + </conditional> + + <!-- Submission controls --> + <param name="submit_test" type="boolean" truevalue="true" falsevalue="false" label="Submit to ENA test server" help="Uploads to the test server of ENA will not be made public and will be removed automatically in 24 hours. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find these uploads at https://wwwdev.ebi.ac.uk/ena/." /> + <param name="dry_run" type="boolean" truevalue="true" falsevalue="false" label="Validate files and metadata but do not submit" help="Generate input files and run Webin-CLI with -validate option. If 'No' is selected then it will validate and submit (-submit flag)"/> + </inputs> + + <!-- + Outputs: + - generated_manifests: discovered in manifests/ (via regex) for transparency + - webin_cli_log: combined stdout/stderr + helper echo statements + - webin_cli_outputs: tar archive of the Webin-CLI output directory + --> + <outputs> + <collection name="generated_manifests" type="list" label="Generated manifests"> + <discover_datasets pattern="(?P<designation>.+)\.txt" ext="txt" directory="manifests/"/> + </collection> + <data name="webin_cli_log" label="ENA submission log" format="txt"/> + <data name="webin_cli_outputs" label="Webin cli outputs" format="tar"/> + + </outputs> + + <tests> + <!-- Test 1: FORM workflow, no chr/AGP --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="viral cRNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="form"/> + <param name="assembly_name" value="Test assembly name"/> + <param name="study_accession" value="PRJEB49173"/> + <param name="sample_accession" value="SAMEA11953908"/> + <param name="sequencing_platform" value="Nanopore 0011"/> + <param name="description" value="Test Description"/> + <param name="center_name" value="Test center name"/> + <param name="genome_fasta" value="phiX2.fasta"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="ERROR: Invalid submission account user name or password\."/> + </assert_contents> + </output> + <output_collection name="generated_manifests"> + <element name="generated_manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/> + <has_text_matching expression="(?m)^AGP\t" negate="true"/> + </assert_contents> + </element> + </output_collection> + </test> + + <!-- Test 2: FORM workflow, chr list present --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="genomic DNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="form"/> + <param name="assembly_name" value="Test assembly name"/> + <param name="study_accession" value="PRJEB49173"/> + <param name="sample_accession" value="SAMEA11953908"/> + <param name="sequencing_platform" value="Nanopore 0011"/> + <param name="description" value="Test Description"/> + <param name="center_name" value="Test center name"/> + <param name="genome_fasta" value="phiX3.fasta"/> + <param name="chr_list_file" value="phiX3.tsv"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="ERROR: Invalid submission account user name or password\."/> + </assert_contents> + </output> + <output_collection name="generated_manifests"> + <element name="generated_manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tphiX3\.fasta\.gz$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\tphiX3\.tsv\.gz$"/> + <has_text_matching expression="(?m)^AGP\t" negate="true"/> + </assert_contents> + </element> + </output_collection> + </test> + + <!-- Test 3: FILE workflow, two FASTAs; one missing metadata --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="viral cRNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="file"/> + <param name="ena_receipt" value="receipt_sample_nophiX2.txt"/> + <param name="genome_fasta" value="phiX2.fasta.gz,sample_alias_001.fasta.gz"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="Processing phiX2"/> + <has_text_matching expression="No metadata found for sample phiX2"/> + <has_text_matching expression="Processing sample_alias_001"/> + <has_text_matching expression="Submitting manifest .*manifests/sample_alias_001\.manifest\.txt"/> + <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/> + </assert_contents> + </output> + <output_collection name="generated_manifests"> + <element name="sample_alias_001.manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tsample_alias_001\.fasta\.gz$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/> + <has_text_matching expression="(?m)^AGP\t" negate="true"/> + </assert_contents> + </element> + </output_collection> + </test> + + <!-- Test 4: FILE workflow, single FASTA with metadata --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="viral cRNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="file"/> + <param name="ena_receipt" value="receipt_sample.txt"/> + <param name="genome_fasta" value="sample_alias_001.fasta.gz"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="Processing sample_alias_001"/> + <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/> + </assert_contents> + </output> + </test> + + <!-- Test 5: FILE workflow, AGP for phiX2 --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="genomic DNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="file"/> + <param name="ena_receipt" value="receipt_sample.txt"/> + <param name="genome_fasta" value="phiX2.fasta"/> + <param name="agp_file" value="phiX2.agp"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="Processing phiX2"/> + <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/> + </assert_contents> + </output> + <output_collection name="generated_manifests"> + <element name="phiX2.manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/> + <has_text_matching expression="(?m)^AGP\tphiX2\.agp$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/> + </assert_contents> + </element> + </output_collection> + </test> + + <!-- Test 6: FILE workflow, chr list for phiX3 + extra fasta --> + <test> + <param name="submit_test" value="true" /> + <param name="dry_run" value="true" /> + <param name="assembly_type" value="isolate"/> + <param name="assembly_program" value="Test assembly program"/> + <param name="molecule_type" value="genomic DNA"/> + <param name="coverage" value="10000"/> + <conditional name="metadata_file_or_form"> + <param name="metadata_format" value="file"/> + <param name="ena_receipt" value="receipt_sample_phiX3.txt"/> + <param name="genome_fasta" value="phiX3.fasta,phiX2.fasta.gz"/> + <param name="chr_list_file" value="phiX3.tsv"/> + </conditional> + <param name="min_gap_length" value="30"/> + <output name="webin_cli_log"> + <assert_contents> + <has_text_matching expression="Processing phiX3"/> + <has_text_matching expression="ERROR: Invalid submission account user name or password\. Please try enclosing your password in single quotes\."/> + </assert_contents> + </output> + <output_collection name="generated_manifests"> + <element name="phiX2.manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tphiX2\.fasta\.gz$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\t" negate="true"/> + <has_text_matching expression="(?m)^AGP\t" negate="true"/> + </assert_contents> + </element> + <element name="phiX3.manifest"> + <assert_contents> + <has_text_matching expression="(?m)^FASTA\tphiX3\.fasta\.gz$"/> + <has_text_matching expression="(?m)^CHROMOSOME_LIST\tphiX3\.tsv\.gz$"/> + <has_text_matching expression="(?m)^AGP\t" negate="true"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + + + + <!-- Help text + citation --> + <help><![CDATA[ + This tool is a wrapper for the ENA Webin CLI submission tool (https://ena-docs.readthedocs.io/en/latest/submit/general-guide/webin-cli.html). + + .. class:: warningmark + + The ENA upload tool won't work unless you have provided an ENA Webin ID in User > Preferences > Manage Information > ENA Webin account details.]]></help> + + <citations> + <citation type="doi">10.1093/nar/gkac1051</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/process_input.py Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,132 @@ +import json +import os +import sys + +import yaml + + +def get_section_string(f, start_line, end_line, return_string=False): + # consume starting lines + start_string = iter(f.readline, start_line) + start_string = ''.join(line for line in start_string) + # read YAML lines + yaml_string = iter(f.readline, end_line) + if return_string: + return ''.join(x for x in yaml_string) + else: + return [x for x in yaml_string] + + +def fill_from_yaml_data(yaml_only_dict, studies_samples_dict): + # fill experiment information (platform) + for index, exp in yaml_only_dict['ENA_experiment'].items(): + study_alias = exp['study_alias'] + sample_alias = exp['sample_alias'] + if study_alias in studies_samples_dict.keys(): + if sample_alias in studies_samples_dict[study_alias].keys(): + studies_samples_dict[study_alias][sample_alias]['experiments'].append({'platform': exp['platform']}) + else: + studies_samples_dict[study_alias][sample_alias] = {'experiments': [{'platform': exp['platform']}]} + else: + studies_samples_dict[study_alias] = { + sample_alias: {'experiments': [{'platform': exp['platform']}]} + } + + +def load_receipt_data(input_file_path): + # should do some health check of the input file? + # load yaml section + loaded_data = {} + yaml_delimiter = 'YAML -------------\n' + with open(input_file_path) as input_file: + yaml_only_section = yaml.safe_load( + get_section_string(input_file, start_line=yaml_delimiter, end_line=yaml_delimiter, return_string=True) + ) + fill_from_yaml_data(yaml_only_section, loaded_data) + # read study accessions + study_delimiter = 'Study accession details:\n' + end_line = '\n' + with open(input_file_path) as input_file: + studies_accession_lines = get_section_string(input_file, start_line=study_delimiter, end_line=end_line) + + for study_line in studies_accession_lines: + if study_line != '\n': + alias, accession, *_ = study_line.split('\t') + try: + loaded_data[alias]['accession'] = accession + except KeyError: + print(f"Experiment {alias} has unknown study or sample") + + samples_delimiter = 'Sample accession details:\n' + with open(input_file_path) as input_file: + samples_accession_lines = get_section_string(input_file, start_line=samples_delimiter, end_line=end_line) + + for sample_line in samples_accession_lines: + if sample_line != '\n': + alias, accession, *_ = sample_line.split('\t') + for study in loaded_data.keys(): + if alias in loaded_data[study].keys(): + loaded_data[study][alias]['accession'] = accession + break + + return loaded_data + + +def main(): + input_file_path = sys.argv[1] + fasta_names_list_path = sys.argv[2] + out_manifest_base = sys.argv[3] + manifest_template = sys.argv[4] + + # load submitted data from receipt file + data_dict = load_receipt_data(input_file_path) + + # iterate over the list of fasta files + with open(fasta_names_list_path, 'r') as fasta_files_json_file: + fasta_files_list = json.load(fasta_files_json_file) + + with open('submit_list.tab', 'w') as written_manifests_out: + for fasta_file in fasta_files_list: + if fasta_file.endswith('.fasta.gz'): + sample_alias = fasta_file[:-9] + else: + sample_alias = fasta_file[:-6] + + print(f'Processing {sample_alias}') + found_metadata = False + + for study_alias in data_dict.keys(): + if sample_alias in data_dict[study_alias].keys(): + sample_accession = data_dict[study_alias][sample_alias]['accession'] + study_accession = data_dict[study_alias]['accession'] + # TODO: get a string that concatenates platform information from multiple experiments + platform = data_dict[study_alias][sample_alias]['experiments'][0]['platform'] + manifest_path = os.path.join(out_manifest_base, sample_alias + '.manifest.txt') + + with open(manifest_path, "w") as output_handle: + # dump the contents of manifest template containing global vars + with open(manifest_template) as m_template: + output_handle.write(m_template.read()) + + output_handle.write("ASSEMBLYNAME\tconsensus_" + sample_alias + "\n") + output_handle.write("PLATFORM\t" + platform + "\n") + output_handle.write("STUDY\t" + study_accession + "\n") + output_handle.write("SAMPLE\t" + sample_accession + "\n") + output_handle.write("FASTA\t" + sample_alias + '.fasta.gz' + "\n") + agp_path = os.path.join("./fasta", sample_alias + ".agp") + if os.path.exists(agp_path): + output_handle.write("AGP\t" + sample_alias + ".agp\n") + chr_list_path = os.path.join("./fasta", sample_alias + ".tsv.gz") + if os.path.exists(chr_list_path): + output_handle.write("CHROMOSOME_LIST\t" + sample_alias + ".tsv.gz\n") + + found_metadata = True + written_manifests_out.write(manifest_path + '\n') + break + + if not found_metadata: + print(f'No metadata found for sample {sample_alias}') + + +if __name__ == '__main__': + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX2.agp Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,5 @@ +##agp-version 2.0 +# ORGANISM: Bacteriophage phiX174 +# ASSEMBLY NAME: phiX2 +# OBJECT: phiX2 is represented by a single component spanning 1..5386 +phiX2 1 5386 1 W phiX174 1 5386 + \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX2.fasta Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,90 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA +>phi2174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX3.fasta Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,78 @@ +>phiX174 +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCGTGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAATGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTACTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCATTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/phiX3.tsv Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,1 @@ +phiX174 1 segmented \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/receipt_sample.txt Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,76 @@ +YAML ------------- +ENA_experiment: + 0: + alias: exp_test_alias_001 + design_description: Lot's of coffe and magic + insert_size: 250.0 + instrument_model: NextSeq 500 + library_construction_protocol: Illumina COVIDSeq Test Kit + library_layout: PAIRED + library_name: Cov51 + library_selection: RT-PCR + library_source: VIRAL RNA + library_strategy: AMPLICON + platform: ILLUMINA + sample_alias: sample_alias_001 + study_alias: study_alias_001 + title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test + 1: + alias: exp_test_alias_002 + design_description: Lot's of coffe and magic + insert_size: 250.0 + instrument_model: NextSeq 500 + library_construction_protocol: Illumina COVIDSeq Test Kit + library_layout: PAIRED + library_name: Cov51 + library_selection: RT-PCR + library_source: VIRAL RNA + library_strategy: AMPLICON + platform: ILLUMINA + sample_alias: phiX2 + study_alias: study_alias_001 + title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test +ENA_run: + 2: + alias: run_alias_001 + experiment_alias: exp_test_alias_001 + file_format: FASTQ + file_name: run001.fastq.gz +ENA_sample: + 2: + alias: sample_alias_001 + collecting institution: Umbrella Corp. + collection date: '2021-05-03' + collector name: "John Doe" + definition for seropositive sample: '' + 2: + alias: phiX2 + collecting institution: Umbrella Corp. + collection date: '2021-05-03' + collector name: "John Doe" + definition for seropositive sample: '' +ENA_study: + 2: + alias: study_alias_001 + study_abstract: "Help" + study_type: Whole Genome Sequencing + title: Whole genome sequencing of SARS-CoV-2 +YAML ------------- + +Printing receipt to ./receipt.xml + +Submission was done successfully + +Study accession details: +study_alias_001 FAKE0001 2011-01-16T10:52:06.497+01:00 added + +Sample accession details: +sample_alias_001 FAKESAMP001 2011-01-16T10:52:06.497+01:00 added +phiX2 FAKESAMP002 2011-01-16T10:52:06.497+01:00 added + +Saving updates in new tsv tables:: +save updates in ./submission_files/studies_updated.tsv +save updates in ./submission_files/samples_updated.tsv +save updates in ./submission_files/experiments_updated.tsv +save updates in ./submission_files/runs_updated.tsv +action_option add
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/receipt_sample_nophiX2.txt Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,54 @@ +YAML ------------- +ENA_experiment: + 0: + alias: exp_test_alias_001 + design_description: Lot's of coffe and magic + insert_size: 250.0 + instrument_model: NextSeq 500 + library_construction_protocol: Illumina COVIDSeq Test Kit + library_layout: PAIRED + library_name: Cov51 + library_selection: RT-PCR + library_source: VIRAL RNA + library_strategy: AMPLICON + platform: ILLUMINA + sample_alias: sample_alias_001 + study_alias: study_alias_001 + title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test +ENA_run: + 2: + alias: run_alias_001 + experiment_alias: exp_test_alias_001 + file_format: FASTQ + file_name: run001.fastq.gz +ENA_sample: + 2: + alias: sample_alias_001 + collecting institution: Umbrella Corp. + collection date: '2021-05-03' + collector name: "John Doe" + definition for seropositive sample: '' +ENA_study: + 2: + alias: study_alias_001 + study_abstract: "Help" + study_type: Whole Genome Sequencing + title: Whole genome sequencing of SARS-CoV-2 +YAML ------------- + +Printing receipt to ./receipt.xml + +Submission was done successfully + +Study accession details: +study_alias_001 FAKE0001 2011-01-16T10:52:06.497+01:00 added + +Sample accession details: +sample_alias_001 FAKESAMP001 2011-01-16T10:52:06.497+01:00 added + +Saving updates in new tsv tables:: +save updates in ./submission_files/studies_updated.tsv +save updates in ./submission_files/samples_updated.tsv +save updates in ./submission_files/experiments_updated.tsv +save updates in ./submission_files/runs_updated.tsv +action_option add
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/receipt_sample_phiX3.txt Mon Oct 06 12:13:07 2025 +0000 @@ -0,0 +1,76 @@ +YAML ------------- +ENA_experiment: + 0: + alias: exp_test_alias_001 + design_description: Lot's of coffe and magic + insert_size: 250.0 + instrument_model: NextSeq 500 + library_construction_protocol: Illumina COVIDSeq Test Kit + library_layout: PAIRED + library_name: Cov51 + library_selection: RT-PCR + library_source: VIRAL RNA + library_strategy: AMPLICON + platform: ILLUMINA + sample_alias: phiX2 + study_alias: study_alias_001 + title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test + 1: + alias: exp_test_alias_002 + design_description: Lot's of coffe and magic + insert_size: 250.0 + instrument_model: NextSeq 500 + library_construction_protocol: Illumina COVIDSeq Test Kit + library_layout: PAIRED + library_name: Cov51 + library_selection: RT-PCR + library_source: VIRAL RNA + library_strategy: AMPLICON + platform: ILLUMINA + sample_alias: phiX3 + study_alias: study_alias_001 + title: Illumina NextSeq paired end sequencing; Illumina COVIDSeq Test +ENA_run: + 2: + alias: run_alias_001 + experiment_alias: exp_test_alias_001 + file_format: FASTQ + file_name: run001.fastq.gz +ENA_sample: + 2: + alias: phiX2 + collecting institution: Umbrella Corp. + collection date: '2021-05-03' + collector name: "John Doe" + definition for seropositive sample: '' + 2: + alias: phiX3 + collecting institution: Umbrella Corp. + collection date: '2021-05-03' + collector name: "John Doe" + definition for seropositive sample: '' +ENA_study: + 2: + alias: study_alias_001 + study_abstract: "Help" + study_type: Whole Genome Sequencing + title: Whole genome sequencing of SARS-CoV-2 +YAML ------------- + +Printing receipt to ./receipt.xml + +Submission was done successfully + +Study accession details: +study_alias_001 FAKE0001 2011-01-16T10:52:06.497+01:00 added + +Sample accession details: +phiX2 FAKESAMP001 2011-01-16T10:52:06.497+01:00 added +phiX3 FAKESAMP002 2011-01-16T10:52:06.497+01:00 added + +Saving updates in new tsv tables:: +save updates in ./submission_files/studies_updated.tsv +save updates in ./submission_files/samples_updated.tsv +save updates in ./submission_files/experiments_updated.tsv +save updates in ./submission_files/runs_updated.tsv +action_option add
