diff markitdown.xml @ 0:c7467d9d0b2b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/markitdown commit 1df47411ce8651c1d4f68cd032b2afe7d5a721de
author bgruening
date Mon, 13 Oct 2025 13:22:04 +0000
parents
children bb65bcc725f0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/markitdown.xml	Mon Oct 13 13:22:04 2025 +0000
@@ -0,0 +1,145 @@
+<tool id="markitdown" name="Markitdown" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Convert documents to Markdown</description>
+    <macros>
+        <token name="@TOOL_VERSION@">0.1.3</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">23.0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="3.12">python</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">markitdown</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        #set ext_map = {
+            'pdf': 'pdf', 'docx': 'docx', 'pptx': 'pptx', 'xlsx': 'xlsx',
+            'html': 'html', 'txt': 'txt', 'ipynb': 'ipynb',
+            'markdown': 'md', 'zip': 'zip', 'tabular': 'csv', 'csv': 'csv'
+        }
+
+        #set file_ext = ext_map.get($input.ext, '')
+        #set final_ext = $ext_hint if $ext_hint else $file_ext
+
+        markitdown 
+            ${input}
+            -x $final_ext
+            #if $mime_type:
+                -m $mime_opt
+            #end if
+            #if $charset:
+                -c "$charset_opt"
+            #end if
+            $keep_data_uris
+            -o '$output'
+    ]]></command>
+
+    <inputs>
+        <param name="input" type="data" format="pdf,docx,pptx,xlsx,html,txt,ipynb,markdown,zip,tabular"
+               label="Input file"/>
+        <param name="ext_hint" type="text" optional="true" label="Extension override"/>
+        <param name="mime_type" type="text" optional="true" label="MIME type hint"/>
+        <param name="charset" type="text" optional="true" label="Character set (e.g. UTF-8)"/>
+        <param name="keep_data_uris" type="boolean" truevalue="--keep-data-uris" falsevalue="" label="Keep embedded data URIs"/>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="markdown" label="Converted Markdown output"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input" value="EAR.pdf" ftype="pdf"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="Tags: ERGA-BGE"/>
+                    <has_text text="Lineage: mammalia_odb10"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" value="example.docx" ftype="docx"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="# Lorem ipsum dolor sit amet, consectetur adipiscing elit."/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <!--test>
+            <param name="input" value="example.odt"/>
+            <param name="ext_hint" value="odt"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="This is a Word document"/>
+                </assert_contents>
+            </output>
+        </test-->
+
+        <test>
+            <param name="input" value="report_4.html" ftype="html"/>
+            <param name="keep_data_uris" value="true"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="is the contig length such that using longer or equal length contigs produces"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" value="example.txt" ftype="txt"/>
+            <param name="ext_hint" value="txt"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="This is a plain text file"/>
+                </assert_contents>
+            </output>
+        </test>
+
+        <test>
+            <param name="input" value="example.ipynb" ftype="ipynb"/>
+            <output name="output">
+                <assert_contents>
+                    <has_text text="print(&quot;Hello, world!&quot;)"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+
+    <help format="markdown"><![CDATA[
+
+**Markitdown** converts rich document formats (PDF, DOCX, HTML, etc.) to Markdown.
+
+---
+
+### Supported Formats:
+
+- PDF, DOCX, PPTX, XLSX
+- HTML, TXT, Markdown
+- Jupyter Notebooks (IPYNB)
+- ZIP containing supported formats
+- Tabular (CSV)
+
+---
+
+### Options:
+
+- **Extension override** (`-x`): hint for file type if not obvious
+- **MIME type** (`-m`): manual MIME hint
+- **Charset** (`-c`): text encoding hint
+- **Keep data URIs**: retain base64-encoded images
+
+Project: https://github.com/microsoft/markitdown
+    ]]></help>
+
+    <citations>
+        <citation type="bibtex">
+@misc{markitdown2024,
+  author       = {Microsoft},
+  title        = {markitdown: Convert documents to markdown},
+  year         = {2024},
+  howpublished = {\url{https://github.com/microsoft/markitdown}}
+}
+        </citation>
+    </citations>
+</tool>