Mercurial > repos > bgruening > markitdown
comparison markitdown.xml @ 0:c7467d9d0b2b draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/markitdown commit 1df47411ce8651c1d4f68cd032b2afe7d5a721de
| author | bgruening |
|---|---|
| date | Mon, 13 Oct 2025 13:22:04 +0000 |
| parents | |
| children | bb65bcc725f0 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c7467d9d0b2b |
|---|---|
| 1 <tool id="markitdown" name="Markitdown" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Convert documents to Markdown</description> | |
| 3 <macros> | |
| 4 <token name="@TOOL_VERSION@">0.1.3</token> | |
| 5 <token name="@VERSION_SUFFIX@">0</token> | |
| 6 <token name="@PROFILE@">23.0</token> | |
| 7 </macros> | |
| 8 <requirements> | |
| 9 <requirement type="package" version="3.12">python</requirement> | |
| 10 <requirement type="package" version="@TOOL_VERSION@">markitdown</requirement> | |
| 11 </requirements> | |
| 12 | |
| 13 <command detect_errors="exit_code"><![CDATA[ | |
| 14 #set ext_map = { | |
| 15 'pdf': 'pdf', 'docx': 'docx', 'pptx': 'pptx', 'xlsx': 'xlsx', | |
| 16 'html': 'html', 'txt': 'txt', 'ipynb': 'ipynb', | |
| 17 'markdown': 'md', 'zip': 'zip', 'tabular': 'csv', 'csv': 'csv' | |
| 18 } | |
| 19 | |
| 20 #set file_ext = ext_map.get($input.ext, '') | |
| 21 #set final_ext = $ext_hint if $ext_hint else $file_ext | |
| 22 | |
| 23 markitdown | |
| 24 ${input} | |
| 25 -x $final_ext | |
| 26 #if $mime_type: | |
| 27 -m $mime_opt | |
| 28 #end if | |
| 29 #if $charset: | |
| 30 -c "$charset_opt" | |
| 31 #end if | |
| 32 $keep_data_uris | |
| 33 -o '$output' | |
| 34 ]]></command> | |
| 35 | |
| 36 <inputs> | |
| 37 <param name="input" type="data" format="pdf,docx,pptx,xlsx,html,txt,ipynb,markdown,zip,tabular" | |
| 38 label="Input file"/> | |
| 39 <param name="ext_hint" type="text" optional="true" label="Extension override"/> | |
| 40 <param name="mime_type" type="text" optional="true" label="MIME type hint"/> | |
| 41 <param name="charset" type="text" optional="true" label="Character set (e.g. UTF-8)"/> | |
| 42 <param name="keep_data_uris" type="boolean" truevalue="--keep-data-uris" falsevalue="" label="Keep embedded data URIs"/> | |
| 43 </inputs> | |
| 44 | |
| 45 <outputs> | |
| 46 <data name="output" format="markdown" label="Converted Markdown output"/> | |
| 47 </outputs> | |
| 48 | |
| 49 <tests> | |
| 50 <test> | |
| 51 <param name="input" value="EAR.pdf" ftype="pdf"/> | |
| 52 <output name="output"> | |
| 53 <assert_contents> | |
| 54 <has_text text="Tags: ERGA-BGE"/> | |
| 55 <has_text text="Lineage: mammalia_odb10"/> | |
| 56 </assert_contents> | |
| 57 </output> | |
| 58 </test> | |
| 59 | |
| 60 <test> | |
| 61 <param name="input" value="example.docx" ftype="docx"/> | |
| 62 <output name="output"> | |
| 63 <assert_contents> | |
| 64 <has_text text="# Lorem ipsum dolor sit amet, consectetur adipiscing elit."/> | |
| 65 </assert_contents> | |
| 66 </output> | |
| 67 </test> | |
| 68 | |
| 69 <!--test> | |
| 70 <param name="input" value="example.odt"/> | |
| 71 <param name="ext_hint" value="odt"/> | |
| 72 <output name="output"> | |
| 73 <assert_contents> | |
| 74 <has_text text="This is a Word document"/> | |
| 75 </assert_contents> | |
| 76 </output> | |
| 77 </test--> | |
| 78 | |
| 79 <test> | |
| 80 <param name="input" value="report_4.html" ftype="html"/> | |
| 81 <param name="keep_data_uris" value="true"/> | |
| 82 <output name="output"> | |
| 83 <assert_contents> | |
| 84 <has_text text="is the contig length such that using longer or equal length contigs produces"/> | |
| 85 </assert_contents> | |
| 86 </output> | |
| 87 </test> | |
| 88 | |
| 89 <test> | |
| 90 <param name="input" value="example.txt" ftype="txt"/> | |
| 91 <param name="ext_hint" value="txt"/> | |
| 92 <output name="output"> | |
| 93 <assert_contents> | |
| 94 <has_text text="This is a plain text file"/> | |
| 95 </assert_contents> | |
| 96 </output> | |
| 97 </test> | |
| 98 | |
| 99 <test> | |
| 100 <param name="input" value="example.ipynb" ftype="ipynb"/> | |
| 101 <output name="output"> | |
| 102 <assert_contents> | |
| 103 <has_text text="print("Hello, world!")"/> | |
| 104 </assert_contents> | |
| 105 </output> | |
| 106 </test> | |
| 107 </tests> | |
| 108 | |
| 109 <help format="markdown"><![CDATA[ | |
| 110 | |
| 111 **Markitdown** converts rich document formats (PDF, DOCX, HTML, etc.) to Markdown. | |
| 112 | |
| 113 --- | |
| 114 | |
| 115 ### Supported Formats: | |
| 116 | |
| 117 - PDF, DOCX, PPTX, XLSX | |
| 118 - HTML, TXT, Markdown | |
| 119 - Jupyter Notebooks (IPYNB) | |
| 120 - ZIP containing supported formats | |
| 121 - Tabular (CSV) | |
| 122 | |
| 123 --- | |
| 124 | |
| 125 ### Options: | |
| 126 | |
| 127 - **Extension override** (`-x`): hint for file type if not obvious | |
| 128 - **MIME type** (`-m`): manual MIME hint | |
| 129 - **Charset** (`-c`): text encoding hint | |
| 130 - **Keep data URIs**: retain base64-encoded images | |
| 131 | |
| 132 Project: https://github.com/microsoft/markitdown | |
| 133 ]]></help> | |
| 134 | |
| 135 <citations> | |
| 136 <citation type="bibtex"> | |
| 137 @misc{markitdown2024, | |
| 138 author = {Microsoft}, | |
| 139 title = {markitdown: Convert documents to markdown}, | |
| 140 year = {2024}, | |
| 141 howpublished = {\url{https://github.com/microsoft/markitdown}} | |
| 142 } | |
| 143 </citation> | |
| 144 </citations> | |
| 145 </tool> |
