comparison markitdown.xml @ 0:c7467d9d0b2b draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/markitdown commit 1df47411ce8651c1d4f68cd032b2afe7d5a721de
author bgruening
date Mon, 13 Oct 2025 13:22:04 +0000
parents
children bb65bcc725f0
comparison
equal deleted inserted replaced
-1:000000000000 0:c7467d9d0b2b
1 <tool id="markitdown" name="Markitdown" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Convert documents to Markdown</description>
3 <macros>
4 <token name="@TOOL_VERSION@">0.1.3</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">23.0</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="3.12">python</requirement>
10 <requirement type="package" version="@TOOL_VERSION@">markitdown</requirement>
11 </requirements>
12
13 <command detect_errors="exit_code"><![CDATA[
14 #set ext_map = {
15 'pdf': 'pdf', 'docx': 'docx', 'pptx': 'pptx', 'xlsx': 'xlsx',
16 'html': 'html', 'txt': 'txt', 'ipynb': 'ipynb',
17 'markdown': 'md', 'zip': 'zip', 'tabular': 'csv', 'csv': 'csv'
18 }
19
20 #set file_ext = ext_map.get($input.ext, '')
21 #set final_ext = $ext_hint if $ext_hint else $file_ext
22
23 markitdown
24 ${input}
25 -x $final_ext
26 #if $mime_type:
27 -m $mime_opt
28 #end if
29 #if $charset:
30 -c "$charset_opt"
31 #end if
32 $keep_data_uris
33 -o '$output'
34 ]]></command>
35
36 <inputs>
37 <param name="input" type="data" format="pdf,docx,pptx,xlsx,html,txt,ipynb,markdown,zip,tabular"
38 label="Input file"/>
39 <param name="ext_hint" type="text" optional="true" label="Extension override"/>
40 <param name="mime_type" type="text" optional="true" label="MIME type hint"/>
41 <param name="charset" type="text" optional="true" label="Character set (e.g. UTF-8)"/>
42 <param name="keep_data_uris" type="boolean" truevalue="--keep-data-uris" falsevalue="" label="Keep embedded data URIs"/>
43 </inputs>
44
45 <outputs>
46 <data name="output" format="markdown" label="Converted Markdown output"/>
47 </outputs>
48
49 <tests>
50 <test>
51 <param name="input" value="EAR.pdf" ftype="pdf"/>
52 <output name="output">
53 <assert_contents>
54 <has_text text="Tags: ERGA-BGE"/>
55 <has_text text="Lineage: mammalia_odb10"/>
56 </assert_contents>
57 </output>
58 </test>
59
60 <test>
61 <param name="input" value="example.docx" ftype="docx"/>
62 <output name="output">
63 <assert_contents>
64 <has_text text="# Lorem ipsum dolor sit amet, consectetur adipiscing elit."/>
65 </assert_contents>
66 </output>
67 </test>
68
69 <!--test>
70 <param name="input" value="example.odt"/>
71 <param name="ext_hint" value="odt"/>
72 <output name="output">
73 <assert_contents>
74 <has_text text="This is a Word document"/>
75 </assert_contents>
76 </output>
77 </test-->
78
79 <test>
80 <param name="input" value="report_4.html" ftype="html"/>
81 <param name="keep_data_uris" value="true"/>
82 <output name="output">
83 <assert_contents>
84 <has_text text="is the contig length such that using longer or equal length contigs produces"/>
85 </assert_contents>
86 </output>
87 </test>
88
89 <test>
90 <param name="input" value="example.txt" ftype="txt"/>
91 <param name="ext_hint" value="txt"/>
92 <output name="output">
93 <assert_contents>
94 <has_text text="This is a plain text file"/>
95 </assert_contents>
96 </output>
97 </test>
98
99 <test>
100 <param name="input" value="example.ipynb" ftype="ipynb"/>
101 <output name="output">
102 <assert_contents>
103 <has_text text="print(&quot;Hello, world!&quot;)"/>
104 </assert_contents>
105 </output>
106 </test>
107 </tests>
108
109 <help format="markdown"><![CDATA[
110
111 **Markitdown** converts rich document formats (PDF, DOCX, HTML, etc.) to Markdown.
112
113 ---
114
115 ### Supported Formats:
116
117 - PDF, DOCX, PPTX, XLSX
118 - HTML, TXT, Markdown
119 - Jupyter Notebooks (IPYNB)
120 - ZIP containing supported formats
121 - Tabular (CSV)
122
123 ---
124
125 ### Options:
126
127 - **Extension override** (`-x`): hint for file type if not obvious
128 - **MIME type** (`-m`): manual MIME hint
129 - **Charset** (`-c`): text encoding hint
130 - **Keep data URIs**: retain base64-encoded images
131
132 Project: https://github.com/microsoft/markitdown
133 ]]></help>
134
135 <citations>
136 <citation type="bibtex">
137 @misc{markitdown2024,
138 author = {Microsoft},
139 title = {markitdown: Convert documents to markdown},
140 year = {2024},
141 howpublished = {\url{https://github.com/microsoft/markitdown}}
142 }
143 </citation>
144 </citations>
145 </tool>