Mercurial > repos > bgruening > markitdown
diff markitdown.xml @ 0:c7467d9d0b2b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/markitdown commit 1df47411ce8651c1d4f68cd032b2afe7d5a721de
| author | bgruening |
|---|---|
| date | Mon, 13 Oct 2025 13:22:04 +0000 |
| parents | |
| children | bb65bcc725f0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/markitdown.xml Mon Oct 13 13:22:04 2025 +0000 @@ -0,0 +1,145 @@ +<tool id="markitdown" name="Markitdown" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>Convert documents to Markdown</description> + <macros> + <token name="@TOOL_VERSION@">0.1.3</token> + <token name="@VERSION_SUFFIX@">0</token> + <token name="@PROFILE@">23.0</token> + </macros> + <requirements> + <requirement type="package" version="3.12">python</requirement> + <requirement type="package" version="@TOOL_VERSION@">markitdown</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + #set ext_map = { + 'pdf': 'pdf', 'docx': 'docx', 'pptx': 'pptx', 'xlsx': 'xlsx', + 'html': 'html', 'txt': 'txt', 'ipynb': 'ipynb', + 'markdown': 'md', 'zip': 'zip', 'tabular': 'csv', 'csv': 'csv' + } + + #set file_ext = ext_map.get($input.ext, '') + #set final_ext = $ext_hint if $ext_hint else $file_ext + + markitdown + ${input} + -x $final_ext + #if $mime_type: + -m $mime_opt + #end if + #if $charset: + -c "$charset_opt" + #end if + $keep_data_uris + -o '$output' + ]]></command> + + <inputs> + <param name="input" type="data" format="pdf,docx,pptx,xlsx,html,txt,ipynb,markdown,zip,tabular" + label="Input file"/> + <param name="ext_hint" type="text" optional="true" label="Extension override"/> + <param name="mime_type" type="text" optional="true" label="MIME type hint"/> + <param name="charset" type="text" optional="true" label="Character set (e.g. UTF-8)"/> + <param name="keep_data_uris" type="boolean" truevalue="--keep-data-uris" falsevalue="" label="Keep embedded data URIs"/> + </inputs> + + <outputs> + <data name="output" format="markdown" label="Converted Markdown output"/> + </outputs> + + <tests> + <test> + <param name="input" value="EAR.pdf" ftype="pdf"/> + <output name="output"> + <assert_contents> + <has_text text="Tags: ERGA-BGE"/> + <has_text text="Lineage: mammalia_odb10"/> + </assert_contents> + </output> + </test> + + <test> + <param name="input" value="example.docx" ftype="docx"/> + <output name="output"> + <assert_contents> + <has_text text="# Lorem ipsum dolor sit amet, consectetur adipiscing elit."/> + </assert_contents> + </output> + </test> + + <!--test> + <param name="input" value="example.odt"/> + <param name="ext_hint" value="odt"/> + <output name="output"> + <assert_contents> + <has_text text="This is a Word document"/> + </assert_contents> + </output> + </test--> + + <test> + <param name="input" value="report_4.html" ftype="html"/> + <param name="keep_data_uris" value="true"/> + <output name="output"> + <assert_contents> + <has_text text="is the contig length such that using longer or equal length contigs produces"/> + </assert_contents> + </output> + </test> + + <test> + <param name="input" value="example.txt" ftype="txt"/> + <param name="ext_hint" value="txt"/> + <output name="output"> + <assert_contents> + <has_text text="This is a plain text file"/> + </assert_contents> + </output> + </test> + + <test> + <param name="input" value="example.ipynb" ftype="ipynb"/> + <output name="output"> + <assert_contents> + <has_text text="print("Hello, world!")"/> + </assert_contents> + </output> + </test> + </tests> + + <help format="markdown"><![CDATA[ + +**Markitdown** converts rich document formats (PDF, DOCX, HTML, etc.) to Markdown. + +--- + +### Supported Formats: + +- PDF, DOCX, PPTX, XLSX +- HTML, TXT, Markdown +- Jupyter Notebooks (IPYNB) +- ZIP containing supported formats +- Tabular (CSV) + +--- + +### Options: + +- **Extension override** (`-x`): hint for file type if not obvious +- **MIME type** (`-m`): manual MIME hint +- **Charset** (`-c`): text encoding hint +- **Keep data URIs**: retain base64-encoded images + +Project: https://github.com/microsoft/markitdown + ]]></help> + + <citations> + <citation type="bibtex"> +@misc{markitdown2024, + author = {Microsoft}, + title = {markitdown: Convert documents to markdown}, + year = {2024}, + howpublished = {\url{https://github.com/microsoft/markitdown}} +} + </citation> + </citations> +</tool>
