Mercurial > repos > bgruening > text_processing
comparison ansi2html.sh @ 3:7068d1548234 draft
Uploaded
| author | bgruening |
|---|---|
| date | Sun, 06 Oct 2013 08:22:36 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:fc862d5bccaf | 3:7068d1548234 |
|---|---|
| 1 #!/bin/sh | |
| 2 | |
| 3 # Convert ANSI (terminal) colours and attributes to HTML | |
| 4 | |
| 5 # Author: | |
| 6 # http://www.pixelbeat.org/docs/terminal_colours/ | |
| 7 # Examples: | |
| 8 # ls -l --color=always | ansi2html.sh > ls.html | |
| 9 # git show --color | ansi2html.sh > last_change.html | |
| 10 # Generally one can use the `script` util to capture full terminal output. | |
| 11 # Changes: | |
| 12 # V0.1, 24 Apr 2008, Initial release | |
| 13 # V0.2, 01 Jan 2009, Phil Harnish <philharnish@gmail.com> | |
| 14 # Support `git diff --color` output by | |
| 15 # matching ANSI codes that specify only | |
| 16 # bold or background colour. | |
| 17 # P@draigBrady.com | |
| 18 # Support `ls --color` output by stripping | |
| 19 # redundant leading 0s from ANSI codes. | |
| 20 # Support `grep --color=always` by stripping | |
| 21 # unhandled ANSI codes (specifically ^[[K). | |
| 22 # V0.3, 20 Mar 2009, http://eexpress.blog.ubuntu.org.cn/ | |
| 23 # Remove cat -v usage which mangled non ascii input. | |
| 24 # Cleanup regular expressions used. | |
| 25 # Support other attributes like reverse, ... | |
| 26 # P@draigBrady.com | |
| 27 # Correctly nest <span> tags (even across lines). | |
| 28 # Add a command line option to use a dark background. | |
| 29 # Strip more terminal control codes. | |
| 30 # V0.4, 17 Sep 2009, P@draigBrady.com | |
| 31 # Handle codes with combined attributes and color. | |
| 32 # Handle isolated <bold> attributes with css. | |
| 33 # Strip more terminal control codes. | |
| 34 # V0.12, 12 Jul 2011 | |
| 35 # http://github.com/pixelb/scripts/commits/master/scripts/ansi2html.sh | |
| 36 | |
| 37 if [ "$1" = "--version" ]; then | |
| 38 echo "0.12" && exit | |
| 39 fi | |
| 40 | |
| 41 if [ "$1" = "--help" ]; then | |
| 42 echo "This utility converts ANSI codes in data passed to stdin" >&2 | |
| 43 echo "It has 2 optional parameters:" >&2 | |
| 44 echo " --bg=dark --palette=linux|solarized|tango|xterm" >&2 | |
| 45 echo "E.g.: ls -l --color=always | ansi2html.sh --bg=dark > ls.html" >&2 | |
| 46 exit | |
| 47 fi | |
| 48 | |
| 49 [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } | |
| 50 | |
| 51 if [ "$1" = "--palette=solarized" ]; then | |
| 52 # See http://ethanschoonover.com/solarized | |
| 53 P0=073642; P1=D30102; P2=859900; P3=B58900; | |
| 54 P4=268BD2; P5=D33682; P6=2AA198; P7=EEE8D5; | |
| 55 P8=002B36; P9=CB4B16; P10=586E75; P11=657B83; | |
| 56 P12=839496; P13=6C71C4; P14=93A1A1; P15=FDF6E3; | |
| 57 shift; | |
| 58 elif [ "$1" = "--palette=solarized-xterm" ]; then | |
| 59 # Above mapped onto the xterm 256 color palette | |
| 60 P0=262626; P1=AF0000; P2=5F8700; P3=AF8700; | |
| 61 P4=0087FF; P5=AF005F; P6=00AFAF; P7=E4E4E4; | |
| 62 P8=1C1C1C; P9=D75F00; P10=585858; P11=626262; | |
| 63 P12=808080; P13=5F5FAF; P14=8A8A8A; P15=FFFFD7; | |
| 64 shift; | |
| 65 elif [ "$1" = "--palette=tango" ]; then | |
| 66 # Gnome default | |
| 67 P0=000000; P1=CC0000; P2=4E9A06; P3=C4A000; | |
| 68 P4=3465A4; P5=75507B; P6=06989A; P7=D3D7CF; | |
| 69 P8=555753; P9=EF2929; P10=8AE234; P11=FCE94F; | |
| 70 P12=729FCF; P13=AD7FA8; P14=34E2E2; P15=EEEEEC; | |
| 71 shift; | |
| 72 elif [ "$1" = "--palette=xterm" ]; then | |
| 73 P0=000000; P1=CD0000; P2=00CD00; P3=CDCD00; | |
| 74 P4=0000EE; P5=CD00CD; P6=00CDCD; P7=E5E5E5; | |
| 75 P8=7F7F7F; P9=FF0000; P10=00FF00; P11=FFFF00; | |
| 76 P12=5C5CFF; P13=FF00FF; P14=00FFFF; P15=FFFFFF; | |
| 77 shift; | |
| 78 else # linux console | |
| 79 P0=000000; P1=AA0000; P2=00AA00; P3=AA5500; | |
| 80 P4=0000AA; P5=AA00AA; P6=00AAAA; P7=AAAAAA; | |
| 81 P8=555555; P9=FF5555; P10=55FF55; P11=FFFF55; | |
| 82 P12=5555FF; P13=FF55FF; P14=55FFFF; P15=FFFFFF; | |
| 83 [ "$1" = "--palette=linux" ] && shift | |
| 84 fi | |
| 85 | |
| 86 [ "$1" = "--bg=dark" ] && { dark_bg=yes; shift; } | |
| 87 | |
| 88 echo -n "<html> | |
| 89 <head> | |
| 90 <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/> | |
| 91 <style type=\"text/css\"> | |
| 92 .ef0,.f0 { color: #$P0; } .eb0,.b0 { background-color: #$P0; } | |
| 93 .ef1,.f1 { color: #$P1; } .eb1,.b1 { background-color: #$P1; } | |
| 94 .ef2,.f2 { color: #$P2; } .eb2,.b2 { background-color: #$P2; } | |
| 95 .ef3,.f3 { color: #$P3; } .eb3,.b3 { background-color: #$P3; } | |
| 96 .ef4,.f4 { color: #$P4; } .eb4,.b4 { background-color: #$P4; } | |
| 97 .ef5,.f5 { color: #$P5; } .eb5,.b5 { background-color: #$P5; } | |
| 98 .ef6,.f6 { color: #$P6; } .eb6,.b6 { background-color: #$P6; } | |
| 99 .ef7,.f7 { color: #$P7; } .eb7,.b7 { background-color: #$P7; } | |
| 100 .ef8, .f0 > .bold,.bold > .f0 { color: #$P8; font-weight: normal; } | |
| 101 .ef9, .f1 > .bold,.bold > .f1 { color: #$P9; font-weight: normal; } | |
| 102 .ef10,.f2 > .bold,.bold > .f2 { color: #$P10; font-weight: normal; } | |
| 103 .ef11,.f3 > .bold,.bold > .f3 { color: #$P11; font-weight: normal; } | |
| 104 .ef12,.f4 > .bold,.bold > .f4 { color: #$P12; font-weight: normal; } | |
| 105 .ef13,.f5 > .bold,.bold > .f5 { color: #$P13; font-weight: normal; } | |
| 106 .ef14,.f6 > .bold,.bold > .f6 { color: #$P14; font-weight: normal; } | |
| 107 .ef15,.f7 > .bold,.bold > .f7 { color: #$P15; font-weight: normal; } | |
| 108 .eb8 { background-color: #$P8; } | |
| 109 .eb9 { background-color: #$P9; } | |
| 110 .eb10 { background-color: #$P10; } | |
| 111 .eb11 { background-color: #$P11; } | |
| 112 .eb12 { background-color: #$P12; } | |
| 113 .eb13 { background-color: #$P13; } | |
| 114 .eb14 { background-color: #$P14; } | |
| 115 .eb15 { background-color: #$P15; } | |
| 116 " | |
| 117 | |
| 118 # The default xterm 256 colour palette | |
| 119 for red in $(seq 0 5); do | |
| 120 for green in $(seq 0 5); do | |
| 121 for blue in $(seq 0 5); do | |
| 122 c=$((16 + ($red * 36) + ($green * 6) + $blue)) | |
| 123 r=$((($red * 40 + 55) * ($red > 0))) | |
| 124 g=$((($green * 40 + 55) * ($green > 0))) | |
| 125 b=$((($blue * 40 + 55) * ($blue > 0))) | |
| 126 printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $r $g $b | |
| 127 printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $r $g $b | |
| 128 done | |
| 129 done | |
| 130 done | |
| 131 for gray in $(seq 0 23); do | |
| 132 c=$(($gray+232)) | |
| 133 l=$(($gray*10 + 8)) | |
| 134 printf ".ef%d { color: #%2.2x%2.2x%2.2x; } " $c $l $l $l | |
| 135 printf ".eb%d { background-color: #%2.2x%2.2x%2.2x; }\n" $c $l $l $l | |
| 136 done | |
| 137 | |
| 138 echo -n ' | |
| 139 .f9 { color: '`[ "$dark_bg" ] && echo "#$P7;" || echo "#$P0;"`' } | |
| 140 .b9 { background-color: #'`[ "$dark_bg" ] && echo $P0 || echo $P15`'; } | |
| 141 .f9 > .bold,.bold > .f9, body.f9 > pre > .bold { | |
| 142 /* Bold is heavy black on white, or bright white | |
| 143 depending on the default background */ | |
| 144 color: '`[ "$dark_bg" ] && echo "#$P15;" || echo "#$P0;"`' | |
| 145 font-weight: '`[ "$dark_bg" ] && echo 'normal;' || echo 'bold;'`' | |
| 146 } | |
| 147 .reverse { | |
| 148 /* CSS doesnt support swapping fg and bg colours unfortunately, | |
| 149 so just hardcode something that will look OK on all backgrounds. */ | |
| 150 '"color: #$P0; background-color: #$P7;"' | |
| 151 } | |
| 152 .underline { text-decoration: underline; } | |
| 153 .line-through { text-decoration: line-through; } | |
| 154 .blink { text-decoration: blink; } | |
| 155 | |
| 156 </style> | |
| 157 </head> | |
| 158 | |
| 159 <body class="f9 b9"> | |
| 160 <pre> | |
| 161 ' | |
| 162 | |
| 163 p='\x1b\[' #shortcut to match escape codes | |
| 164 P="\(^[^°]*\)¡$p" #expression to match prepended codes below | |
| 165 | |
| 166 # Handle various xterm control sequences. | |
| 167 # See /usr/share/doc/xterm-*/ctlseqs.txt | |
| 168 sed " | |
| 169 s#\x1b[^\x1b]*\x1b\\\##g # strip anything between \e and ST | |
| 170 s#\x1b][0-9]*;[^\a]*\a##g # strip any OSC (xterm title etc.) | |
| 171 | |
| 172 #handle carriage returns | |
| 173 s#^.*\r\{1,\}\([^$]\)#\1# | |
| 174 s#\r\$## # strip trailing \r | |
| 175 | |
| 176 # strip other non SGR escape sequences | |
| 177 s#[\x07]##g | |
| 178 s#\x1b[]>=\][0-9;]*##g | |
| 179 s#\x1bP+.\{5\}##g | |
| 180 s#${p}[0-9;?]*[^0-9;?m]##g | |
| 181 | |
| 182 #remove backspace chars and what they're backspacing over | |
| 183 :rm_bs | |
| 184 s#[^\x08]\x08##g; t rm_bs | |
| 185 " | | |
| 186 | |
| 187 # Normalize the input before transformation | |
| 188 sed " | |
| 189 # escape HTML | |
| 190 s#\&#\&#g; s#>#\>#g; s#<#\<#g; s#\"#\"#g | |
| 191 | |
| 192 # normalize SGR codes a little | |
| 193 | |
| 194 # split 256 colors out and mark so that they're not | |
| 195 # recognised by the following 'split combined' line | |
| 196 :e | |
| 197 s#${p}\([0-9;]\{1,\}\);\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m${p}¬\2m#g; t e | |
| 198 s#${p}\([34]8;5;[0-9]\{1,3\}\)m#${p}¬\1m#g; | |
| 199 | |
| 200 :c | |
| 201 s#${p}\([0-9]\{1,\}\);\([0-9;]\{1,\}\)m#${p}\1m${p}\2m#g; t c # split combined | |
| 202 s#${p}0\([0-7]\)#${p}\1#g #strip leading 0 | |
| 203 s#${p}1m\(\(${p}[4579]m\)*\)#\1${p}1m#g #bold last (with clr) | |
| 204 s#${p}m#${p}0m#g #add leading 0 to norm | |
| 205 | |
| 206 # undo any 256 color marking | |
| 207 s#${p}¬\([34]8;5;[0-9]\{1,3\}\)m#${p}\1m#g; | |
| 208 | |
| 209 # map 16 color codes to color + bold | |
| 210 s#${p}9\([0-7]\)m#${p}3\1m${p}1m#g; | |
| 211 s#${p}10\([0-7]\)m#${p}4\1m${p}1m#g; | |
| 212 | |
| 213 # change 'reset' code to a single char, and prepend a single char to | |
| 214 # other codes so that we can easily do negative matching, as sed | |
| 215 # does not support look behind expressions etc. | |
| 216 s#°#\°#g; s#${p}0m#°#g | |
| 217 s#¡#\¡#g; s#${p}[0-9;]*m#¡&#g | |
| 218 " | | |
| 219 | |
| 220 # Convert SGR sequences to HTML | |
| 221 sed " | |
| 222 :ansi_to_span # replace ANSI codes with CSS classes | |
| 223 t ansi_to_span # hack so t commands below only apply to preceeding s cmd | |
| 224 | |
| 225 /^[^¡]*°/ { b span_end } # replace 'reset code' if no preceeding code | |
| 226 | |
| 227 # common combinations to minimise html (optional) | |
| 228 s#${P}3\([0-7]\)m¡${p}4\([0-7]\)m#\1<span class=\"f\2 b\3\">#;t span_count | |
| 229 s#${P}4\([0-7]\)m¡${p}3\([0-7]\)m#\1<span class=\"f\3 b\2\">#;t span_count | |
| 230 | |
| 231 s#${P}1m#\1<span class=\"bold\">#; t span_count | |
| 232 s#${P}4m#\1<span class=\"underline\">#; t span_count | |
| 233 s#${P}5m#\1<span class=\"blink\">#; t span_count | |
| 234 s#${P}7m#\1<span class=\"reverse\">#; t span_count | |
| 235 s#${P}9m#\1<span class=\"line-through\">#; t span_count | |
| 236 s#${P}3\([0-9]\)m#\1<span class=\"f\2\">#; t span_count | |
| 237 s#${P}4\([0-9]\)m#\1<span class=\"b\2\">#; t span_count | |
| 238 | |
| 239 s#${P}38;5;\([0-9]\{1,3\}\)m#\1<span class=\"ef\2\">#; t span_count | |
| 240 s#${P}48;5;\([0-9]\{1,3\}\)m#\1<span class=\"eb\2\">#; t span_count | |
| 241 | |
| 242 s#${P}[0-9;]*m#\1#g; t ansi_to_span # strip unhandled codes | |
| 243 | |
| 244 b # next line of input | |
| 245 | |
| 246 # add a corresponding span end flag | |
| 247 :span_count | |
| 248 x; s/^/s/; x | |
| 249 b ansi_to_span | |
| 250 | |
| 251 # replace 'reset code' with correct number of </span> tags | |
| 252 :span_end | |
| 253 x | |
| 254 /^s/ { | |
| 255 s/^.// | |
| 256 x | |
| 257 s#°#</span>°# | |
| 258 b span_end | |
| 259 } | |
| 260 x | |
| 261 s#°## | |
| 262 b ansi_to_span | |
| 263 " | | |
| 264 | |
| 265 # Convert alternative character set | |
| 266 # Note we convert here, as if we do at start we have to worry about avoiding | |
| 267 # conversion of SGR codes etc., whereas doing here we only have to | |
| 268 # avoid conversions of stuff between &...; or <...> | |
| 269 # | |
| 270 # Note we could use sed to do this based around: | |
| 271 # sed 'y/abcdefghijklmnopqrstuvwxyz{}`~/▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·/' | |
| 272 # However that would be very awkward as we need to only conv some input. | |
| 273 # The basic scheme that we do in the python script below is: | |
| 274 # 1. enable transliterate once ¡ char seen | |
| 275 # 2. disable once µ char seen (may be on diff line to ¡) | |
| 276 # 3. never transliterate between &; or <> chars | |
| 277 sed " | |
| 278 # change 'smacs' and 'rmacs' to a single char so that we can easily do | |
| 279 # negative matching, as sed does not support look behind expressions etc. | |
| 280 # Note we don't use ° like above as that's part of the alternate charset. | |
| 281 s#\x1b(0#¡#g; | |
| 282 s#µ#\µ#g; s#\x1b(B#µ#g | |
| 283 " | | |
| 284 ( | |
| 285 python -c " | |
| 286 # vim:fileencoding=utf8 | |
| 287 | |
| 288 import sys | |
| 289 import locale | |
| 290 encoding=locale.getpreferredencoding() | |
| 291 | |
| 292 old='abcdefghijklmnopqrstuvwxyz{}\`~' | |
| 293 new='▒␉␌␍␊°±␋┘┐┌└┼⎺⎻─⎼⎽├┤┴┬│≤≥π£◆·' | |
| 294 new=unicode(new, 'utf-8') | |
| 295 table=range(128) | |
| 296 for o,n in zip(old, new): table[ord(o)]=n | |
| 297 | |
| 298 (STANDARD, ALTERNATIVE, HTML_TAG, HTML_ENTITY) = (0, 1, 2, 3) | |
| 299 | |
| 300 state = STANDARD | |
| 301 last_mode = STANDARD | |
| 302 for c in unicode(sys.stdin.read(), encoding): | |
| 303 if state == HTML_TAG: | |
| 304 if c == '>': | |
| 305 state = last_mode | |
| 306 elif state == HTML_ENTITY: | |
| 307 if c == ';': | |
| 308 state = last_mode | |
| 309 else: | |
| 310 if c == '<': | |
| 311 state = HTML_TAG | |
| 312 elif c == '&': | |
| 313 state = HTML_ENTITY | |
| 314 elif c == u'¡' and state == STANDARD: | |
| 315 state = ALTERNATIVE | |
| 316 last_mode = ALTERNATIVE | |
| 317 continue | |
| 318 elif c == u'µ' and state == ALTERNATIVE: | |
| 319 state = STANDARD | |
| 320 last_mode = STANDARD | |
| 321 continue | |
| 322 elif state == ALTERNATIVE: | |
| 323 c = c.translate(table) | |
| 324 sys.stdout.write(c.encode(encoding)) | |
| 325 " 2>/dev/null || | |
| 326 sed 's/[¡µ]//g' # just strip aternative flag chars | |
| 327 ) | |
| 328 | |
| 329 echo "</pre> | |
| 330 </body> | |
| 331 </html>" |
