# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2023 # # @@ Meta Begin # Application punk::ansi 999999.0a1.0 # Meta platform tcl # Meta license # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::ansi 0 999999.0a1.0] #[copyright "2023"] #[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] #[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] #[require punk::ansi] #[keywords module ansi terminal console string] #[description] #[para]Ansi based terminal control string functions #[para]See [package punk::ansi::console] for related functions for controlling a console # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::ansi #[para]punk::ansi functions return their values - no implicit emission to console/stdout #[subsection Concepts] #[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner #[para]There are many differences in terminal implementations - but most should support a core set of features #[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. #[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::ansi #[list_begin itemized] package require Tcl 8.6 #*** !doctools #[item] [package {Tcl 8.6}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::ansi { #*** !doctools #[subsection {Namespace punk::ansi}] #[para] Core API functions for punk::ansi #[list_begin definitions] #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control variable test "blah\033\[1;33mETC\033\[0;mOK" #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. namespace export\ {a?} {a+} a \ ansistring\ convert*\ clear*\ cursor_*\ detect*\ get_*\ move*\ reset*\ strip*\ test_decaln\ titleset\ variable escape_terminals #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals dict set escape_terminals DCS [list \007 \033\\ \u009c] dict set escape_terminals MISC [list \007 \033\\ \u009c] #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? variable standalone_codes set standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] #review - there doesn't seem to be an \x1b#7 # https://espterm.github.io/docs/VT100%20escape%20codes.html #self-contained 2 byte ansi escape sequences - review more? set ansi_2byte_codes_dict [dict create\ "reset_terminal" "\u001bc"\ "save_cursor_posn" "\u001b7"\ "restore_cursor_posn" "\u001b8"\ "cursor_up_one" "\u001bM"\ "NEL - Next Line" "\u001bE"\ "IND - Down one line" "\u001bD"\ "HTS - Set Tab Stop" "\u001bH"\ "DECPAM app keypad" "\x1b="\ "DECPNM norm keypad" "\x1b>"\ ] #control strings #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf # #A control string is a string of bit combinations which may occur in the data stream as a logical entity for #control purposes. A control string consists of an opening delimiter, a command string or a character string, #and a terminating delimiter, the STRING TERMINATOR (ST). #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. #A character string is a sequence of any bit combination, except those representing START OF STRING #(SOS) or STRING TERMINATOR (ST). #The interpretation of the command string or the character string is not defined by this Standard, but instead #requires prior agreement between the sender and the recipient of the data. #The opening delimiters defined in this Standard are #a) APPLICATION PROGRAM COMMAND (APC) #b) DEVICE CONTROL STRING (DCS) #c) OPERATING SYSTEM COMMAND (OSC) #d) PRIVACY MESSAGE (PM) #e) START OF STRING (SOS) # #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. #review - can terminals handle SGR codes within a PM? #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) proc controlstring_PM {text} { return "\x1b^${text}\033\\" } proc controlstring_PM8 {text} { return "\x9e${text}\x9c" } proc controlstring_SOS {text} { return "\x1bX${text}\033\\" } proc controlstring_SOS8 {text} { return "\x98${text}\x9c" } proc controlstring_APC {text} { return "\x1b_${text}\033\\" } proc controlstring_APC8 {text} { return "\x9f${text}\x9c" } #candidate for zig/c implementation? proc stripansi {text} { #*** !doctools #[call [fun stripansi] [arg text] ] #[para]Return a string with ansi codes stripped out #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW variable escape_terminals ;#dict variable standalone_codes ;#map to empty string set text [convert_g0 $text] #we should just map away the 2-byte sequences too #standalone 3 byte VT100 sequences - some of these work in wezterm #\x1b#3 double-height letters top half #\x1b#4 double-height letters bottom half #\x1b#5 single-width line #\x1b#6 double-width line #\x1b#8 dec test fill screen set text [string map $standalone_codes $text] #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. #line endings can theoretically occur within an ansi escape sequence payload (review e.g title?) set inputlist [split $text ""] set outputlist [list] set in_escapesequence 0 #assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls set i 0 foreach u $inputlist { set v [lindex $inputlist $i+1] set uv ${u}${v} if {$in_escapesequence eq "2b"} { #2nd byte - done. set in_escapesequence 0 } elseif {$in_escapesequence != 0} { set endseq [dict get $escape_terminals $in_escapesequence] if {$u in $endseq} { set in_escapesequence 0 } elseif {$uv in $endseq} { set in_escapesequence 2b ;#flag next byte as last in sequence } } else { #handle both 7-bit and 8-bit CSI and OSC if {[regexp {^(?:\033\[|\u009b)} $uv]} { set in_escapesequence CSI } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { set in_escapesequence OSC } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { set in_escapesequence DCS } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { #SOS,PM,APC - all terminated with ST set in_escapesequence MISC } else { lappend outputlist $u } } incr i } return [join $outputlist ""] } #review - what happens when no terminator? #todo - map other chars to unicode equivs proc convert_g0 {text} { #using not \033 inside to stop greediness - review how does it compare to ".*?" set re {\033\(0[^\033]*\033\(B} set re2 {\033\(0(.*)\033\(B} ;#capturing set parts [::punk::ansi::ta::_perlish_split $re $text] set out "" foreach {pt g} $parts { append out $pt if {$g ne ""} { #puts --$g-- #box sample #lqk #x x #mqj #m = boxd_lur #set map [list l \u250f k \u2513] ;#heavy set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light regexp $re2 $g _match contents append out [string map $map $contents] } } return $out } #todo - convert esc(0 graphics sequences to single char unicode equivalents e.g box drawing set # esc) ?? proc stripansi_gx {text} { #e.g "\033(0" - select VT100 graphics for character set G0 #e.g "\033(B" - reset #e.g "\033)0" - select VT100 graphics for character set G1 #e.g "\033)X" - where X is any char other than 0 to reset ?? return [convert_g0 $text] } #CSI m = SGR (Select Graphic Rendition) variable SGR_setting_map { bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 reverse 7 noreverse 27 defaultfg 39 defaultbg 49 overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 } variable SGR_colour_map { black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 } variable SGR_map set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] proc colourmap1 {{bgname White}} { package require textblock set bg [textblock::block 33 3 "[a+ $bgname] [a]"] set colormap "" for {set i 0} {$i <= 7} {incr i} { append colormap "_[a+ white bold 48\;5\;$i] $i [a]" } set map1 [overtype::left -transparent _ $bg "\n$colormap"] return $map1 } proc colourmap2 {{bgname White}} { package require textblock set bg [textblock::block 39 3 "[a+ $bgname] [a]"] set colormap "" for {set i 8} {$i <= 15} {incr i} { append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey } set map2 [overtype::left -transparent _ $bg "\n$colormap"] return $map2 } proc a? {args} { #*** !doctools #[call [fun a?] [opt {ansicode...}]] #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map if {![llength $args]} { set out "" append out $SGR_setting_map \n append out $SGR_colour_map \n try { package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try set bgname "White" set map1 [colourmap1 $bgname] set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] set map2 [colourmap2 $bgname] set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] append out [textblock::join $map1 " " $map2] \n #append out $map1[a] \n #append out $map2[a] \n } on error {result options} { puts stderr "Failed to draw colormap" puts stderr "$result" } finally { return $out } } else { set result [list] set rmap [lreverse $map] foreach i $args { if {[string is integer -strict $i]} { if {[dict exists $rmap $i]} { lappend result $i [dict get $rmap $i] } } else { if {[dict exists $map $i]} { lappend result $i [dict get $map $i] } } } return $result } } proc a+ {args} { #*** !doctools #[call [fun a+] [opt {ansicode...}]] #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- if {[string match -nocase "256f*" $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } elseif {[string match -nocase 256b* $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } elseif {[string match -nocase rgbf* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } elseif {[string match -nocase rgbb* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation if {![llength $t]} { return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) } return "\x1b\[[join $t {;}]m" } proc a {args} { #*** !doctools #[call [fun a] [opt {ansicode...}]] #[para]Returns the ansi code to reset any current settings and apply those from the supplied list #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- if {[string match -nocase "256f*" $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } elseif {[string match -nocase 256b* $i]} { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } elseif {[string match -nocase rgbf* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } elseif {[string match -nocase rgbb* $i]} { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } proc get_code_name {code} { #*** !doctools #[call [fun get_code_name] [arg code]] #[para]for example #[para] get_code_name red will return 31 #[para] get_code_name 31 will return red variable SGR_map set res [list] foreach i [split $code ";"] { set ix [lsearch -exact $SGR_map $i] if {[string is digit -strict $code]} { if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} } else { #reverse lookup code from name if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} } } set res } proc reset {} { #*** !doctools #[call [fun reset]] #[para]reset console return "\x1bc" } proc reset_soft {} { #*** !doctools #[call [fun reset_soft]] return \x1b\[!p } proc reset_colour {} { #*** !doctools #[call [fun reset_colour]] #[para]reset colour only return "\x1b\[0m" } # -- --- --- --- --- proc clear {} { #*** !doctools #[call [fun clear]] return "\033\[2J" } proc clear_above {} { #*** !doctools #[call [fun clear_above]] return \033\[1J } proc clear_below {} { #*** !doctools #[call [fun clear_below]] return \033\[0J } proc clear_all {} { # - doesn't work?? return \033\[3J } #see also erase_ functions # -- --- --- --- --- proc cursor_on {} { #*** !doctools #[call [fun cursor_on]] return "\033\[?25h" } proc cursor_off {} { #*** !doctools #[call [fun cursor_off]] return "\033\[?25l" } # -- --- --- --- --- proc move {row col} { #*** !doctools #[call [fun move] [arg row] [arg col]] #[para]Return an ansi sequence to move to row,col #[para]aka cursor home return \033\[${row}\;${col}H } proc move_emit {row col data args} { #*** !doctools #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] #[para]Return an ansi string representing a move to row col with data appended #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout #[para]punk::console::move_emit_return will also return the cursor to the original position #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. set out "" append out \033\[${row}\;${col}H$data foreach {row col data} $args { append out \033\[${row}\;${col}H$data } return $out } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] return \033\[${n}C } proc move_back {{n 1}} { #*** !doctools #[call [fun move_back] [arg n]] return \033\[${n}D } proc move_up {{n 1}} { #*** !doctools #[call [fun move_up] [arg n]] return \033\[${n}A } proc move_down {{n 1}} { #*** !doctools #[call [fun move_down] [arg n]] return \033\[${n}B } # -- --- --- --- --- # -- --- --- --- --- proc erase_line {} { #*** !doctools #[call [fun erase_line]] return \033\[2K } proc erase_sol {} { #*** !doctools #[call [fun erase_sol]] #[para]Erase to start of line, leaving cursor position alone. return \033\[1K } proc erase_eol {} { #*** !doctools #[call [fun erase_eol]] return \033\[K } #see also clear_above clear_below # -- --- --- --- --- proc cursor_pos {} { #*** !doctools #[call [fun cursor_pos]] #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin #[para]The output on screen will look something like ^[lb][lb]47;3R #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list return \033\[6n } #alternative to string terminator is \007 - proc titleset {windowtitle} { #*** !doctools #[call [fun titleset] [arg windowtitles]] #[para]Returns the code to set the title of the terminal window to windowtitle #[para]This may not work on terminals which have multiple panes/windows return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives } #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title #no cross-platform ansi-only mechanism ? proc test_decaln {} { #Screen Alignment Test #Reset margins, move cursor to the top left, and fill the screen with 'E' #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) return \x1b#8 } #length of text for printing characters only #review - unicode and other non-printing chars and combining sequences? #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names #review - is there an existing library or better method? print to a terminal and query cursor position? #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. proc printing_length {line} { if {[string first \n $line] >= 0} { error "line_print_length must not contain newline characters" } #review - set line [punk::ansi::stripansi $line] set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter #(* more correctly - moves cursor back) #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces #normalize tabs to an appropriate* width #*todo - handle terminal/context where tabwidth != the default 8 spaces set line [textutil::tabify::untabify2 $line] set bs [format %c 0x08] #set line [string map [list "\r${bs}" "\r"] $line] ;#backsp following a \r will have no effect set line [string trim $line $bs] set n 0 set chars [split $line ""] #build an output set idx 0 set outchars [list] set outsizes [list] foreach c $chars { if {$c eq $bs} { if {$idx > 0} { incr idx -1 } } elseif {$c eq "\r"} { set idx 0 } else { punk::ansi::internal::printing_length_addchar $idx $c incr idx } } set line2 [join $outchars ""] return [punk::char::string_width $line2] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi ---}] } namespace eval punk::ansi { # -- --- --- --- --- --- #XTGETTCAP # xterm responds with # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and # the value of the corresponding string that xterm would send, # or # DCS 0 + r ST for invalid requests. # The strings are encoded in hexadecimal (2 digits per # character). If more than one name is given, xterm replies # with each name/value pair in the same response. An invalid # name (one not found in xterm's tables) ends processing of the # list of names. proc xtgetcap {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\x1bP+q$payload\x1b\\" } proc xtgetcap2 {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\u0090+q$payload\u009c" } namespace eval codetype { #Functions that operate on a single ansi code sequence - not a sequence, and not codes embedded in another string proc is_sgr {code} { #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline #we will accept and pass through the less common colon separator (ITU Open Document Architecture) #Terminals should generally ignore it if they don't use it regexp {\033\[[0-9;:]*m$} $code } proc is_cursor_move_in_line {code} { #review - what about CSI n : m H where row n happens to be current line? regexp {\033\[[0-9]*(:?C|D|G)$} } #pure SGR reset with no other functions proc is_sgr_reset {code} { #todo 8-bit csi regexp {\033\[0*m$} $code } #whether this code has 0 (or equivalently empty) parameter (but may set others) #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes #it generally only makes sense for the reset to be the first entry - otherwise the code has ineffective portions #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. #We will only look at initial parameter as this is the well-formed normal case. #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code proc has_sgr_leadingreset {code} { set params "" regexp {\033\[(.*)m} $code _match params set plist [split $params ";"] if {[string trim [lindex $plist 0] 0] eq ""} { #e.g \033\[m \033\[0\;...m \033\[0000...m return 1 } else { return 0 } } #has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? } namespace eval sequence_type { proc is_Fe {code} { # C1 control codes if {[regexp {^\033\[[\u0040-\u005F]}]} { #7bit - typical case return 1 } #8bit #review - all C1 escapes ? 0x80-0x90F #This is possibly problematic as it is affected by encoding. #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." return 0 } proc is_Fs {code} { puts stderr "is_Fs unimplemented" } } # -- --- --- --- --- --- --- --- --- --- --- #todo - implement colour resets like the perl module: #https://metacpan.org/pod/Text::ANSI::Util #(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) } namespace eval punk::ansi::ta { #*** !doctools #[subsection {Namespace punk::ansi::ta}] #[para] text ansi functions #[para] based on but not identical to the Perl Text Ansi module: #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm #[list_begin definitions] namespace path ::punk::ansi #handle both 7-bit and 8-bit csi #review - does codepage affect this? e.g ebcdic has 8bit csi in different position #CSI #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m variable re_csi_open {(?:\033\[|\u009b)} #colour and style variable re_csi_colour {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@^_|~`]} #OSC - termnate with BEL (\a \007) or ST (string terminator \033\\) # 8-byte string terminator is \x9c (\u009c) #non-greedy via "*?" doesn't seem to work like this.. #variable re_esc_osc1 {(?:\033\]).*?\007} #variable re_esc_osc2 {(?:\033\]).*?\033\\} #variable re_esc_osc3 {(?:\u009d).*?\u009c} #non-greedy by excluding ST terminators #TODO - FIX? see re_ST below variable re_esc_osc1 {(?:\033\])(?:[^\007]*)\007} variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} variable re_osc_open {(?:\033\]|\u009d).*} #standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} #see stripansi set re_start_ST {^(?:\033X|\u0098|\033\^|\u009e|\033_|\u009f)} #ST terminators [list \007 \033\\ \u009c] #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) #non-greedy by exclusion of ST terminators in body #!!! #TODO - fix. we need to match \033\\ not just \033 ! could be colour codes nested in a privacy msg/string #This will currently terminate the code too early in this case #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) variable re_ST {(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)(?:[^\033\007\u009c]*)(?:\033\\|\007|\u009c)} variable re_ansi_detect "${re_csi_open}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_start_ST}" #detect any ansi escapes #review - only detect 'complete' codes - or just use the opening escapes for performance? proc detect {text} { #*** !doctools #[call [fun detect] [arg text]] #[para]Return a boolean indicating whether Ansi codes were detected in text #[para] variable re_ansi_detect #variable re_csi_open #variable re_esc_osc1 #variable re_esc_osc2 #todo - other escape sequences #expr {[regexp $re_csi_open $text] || [regexp $re_esc_osc1 $text] || [regexp $re_esc_osc2 $text]} expr {[regexp $re_ansi_detect $text]} } #not in perl ta proc detect_csi {text} { #*** !doctools #[call [fun detect_csi] [arg text]] #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation #[para]There is also a multi-byte escape sequence \u009b #[para]This is less commonly used but is also detected here #[para](This function is not in perl ta) variable re_csi_open expr {[regexp $re_csi_open $text]} } proc detect_sgr {text} { #*** !doctools #[call [fun detect_sgr] [arg text]] #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. #[para]This is the set of CSI sequences ending in 'm' #[para]This is most commonly an Ansi colour code - but also things such as underline and italics #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. #[para](This function is not in perl ta) variable re_csi_colour expr {[regexp $re_csi_colour $text]} } proc strip {text} { #*** !doctools #[call [fun strip] [arg text]] #[para]Return text stripped of Ansi codes #[para]This is a tailcall to punk::ansi::stripansi tailcall stripansi $text } proc length {text} { #*** !doctools #[call [fun length] [arg text]] #[para]Return the character length after stripping ansi codes - not the printing length string length [stripansi $text] } #todo - handle newlines #not in perl ta #proc printing_length {text} { # #} proc trunc {text width args} { } #not in perl ta #returns just the plaintext portions in a list proc split_at_codes {text} { variable re_esc_osc1 variable re_esc_osc2 variable re_csi_code variable re_standalones variable re_ST punk::ansi::internal::splitx $text "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_ST}" } # -- --- --- --- --- --- #Split $text to a list containing alternating ANSI color codes and text. #ANSI color codes are always on the second element, fourth, and so on. #(ie plaintext on odd list-indices ansi on even indices) # Example: #ta_split_codes "" # => "" #ta_split_codes "a" # => "a" #ta_split_codes "a\e[31m" # => {"a" "\e[31m"} #ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} #ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} #ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} #ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} # proc split_codes {text} { variable re_esc_osc1 variable re_esc_osc2 variable re_csi_code variable re_standalones variable re_ST set re "(?:${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2})+" return [_perlish_split $re $text] } #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) proc split_codes_single {text} { variable re_esc_osc1 variable re_esc_osc2 variable re_csi_code variable re_standalones variable re_ST set re "${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2}" return [_perlish_split $re $text] } #review - tcl greedy expressions may match multiple in one element proc _perlish_split {re text} { if {[string length $text] == 0} { return {} } set list [list] set start 0 #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW while {[regexp -start $start -indices -- $re $text match]} { lassign $match matchStart matchEnd #puts "->start $start ->match $matchStart $matchEnd" if {$matchEnd < $matchStart} { lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] incr start if {$start >= [string length $text]} { break } continue } lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] set start [expr {$matchEnd+1}] #? if {$start >= [string length $text]} { break } } lappend list [string range $text $start end] return $list } proc _ws_split {text} { regexp -all -inline {(?:\S+)|(?:\s+)} $text } # -- --- --- --- --- --- #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } # -- --- --- --- --- --- --- --- --- --- --- namespace eval punk::ansi::ansistring { #*** !doctools #[subsection {Namespace punk::ansi::ansistring}] #[para]punk::ansi::ansistring ensemble - ansi-aware string operations #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. #[list_begin definitions] namespace path [list ::punk::ansi ::punk::ansi::ta] namespace ensemble create namespace export length trim trimleft trimright index VIEW #todo - expose _splits_ methods so caller can work efficiently with the splits themselves #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single #\UFFFD - replacement char or \U2426 #using ISO 2047 graphical representations of control characters #00 NUL Null ⎕ U+2395 NU #01 TC1, SOH Start of Heading ⌈ U+2308 SH #02 TC2, STX Start of Text ⊥ U+22A5 SX #03 TC3, ETX End of Text ⌋ U+230B EX #04 TC4, EOT End of Transmission ⌁ U+2301[9] ET #05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ #06 TC6, ACK Acknowledge ✓ U+2713 AK #07 BEL Bell ⍾ U+237E[9] BL #08 FE0, BS Backspace ⤺ —[b] BS #09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT #0A FE2, LF Line Feed ≡ U+2261 LF #0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT #0C FE4, FF Form Feed ↡ U+21A1 FF #0D FE5, CR Carriage Return ⪪ U+2AAA CR #0E SO Shift Out ⊗ U+2297 SO #0F SI Shift In ⊙ U+2299 SI #10 TC7, DLE Data Link Escape ⊟ U+229F DL #11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 #12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 #13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 #14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 #15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK #16 TC9, SYN Synchronization ⎍ U+238D SY #17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB #18 CAN Cancel ⧖ U+29D6 CN #19 EM End of Medium ⍿ U+237F[9] EM #1A SUB Substitute Character ␦ U+2426[12] SB #1B ESC Escape ⊖ U+2296 EC #1C IS4, FS File Separator ◰ U+25F0 FS #1D IS3, GS Group Separator ◱ U+25F1 GS #1E IS2, RS Record Separator ◲ U+25F2 RS #1F IS1 US Unit Separator ◳ U+25F3 US #20 SP Space △ U+25B3 SP #7F DEL Delete ▨ —[d] DT proc VIEW {string} { return [string map [list \033 \U2296 \007 \U237E] $string] } proc length {string} { #*** !doctools #[call [fun length] [arg string]] #[para]Returns the length of the string without ansi codes #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. #[para]This is equivalent to calling string length on the result of stripansi $string #[para]Note that this returns the number of characters in the payload, and is not always the same as the width of the string as rendered on a terminal. #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. string length [stripansi $string] } proc trimleft {string args} { set intext 0 set out "" #for split_codes only first or last pt can be empty string foreach {pt ansiblock} [split_codes $string] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { append out $ansiblock } else { append out [string trimleft $pt]$ansiblock set intext 1 } } else { append out $pt$ansiblock } } return $out } proc trimright {string} { if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] return [join $rtrimmed_list ""] } proc trim {string} { #make sure we do our ansi-scanning split only once - so use list-based trim operations #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length #we save a single function call by calling both here rather than _splits_trim join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" } proc index {string index} { #*** !doctools #[call [fun index] [arg string] [arg index]] #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) #[para]Returns the character (with applied ansi effect) at position index #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. #[para]todo: SGR codes within ST-terminated strings not yet ignored properly #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. #[para]As any operation using end-+ will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible #[para]Notes: #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal set splits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run #todo - end-x +/-x+/-x etc set original_index $index set index [string map [list _ ""] $index] #short-circuit some trivial cases if {[string is integer -strict $index]} { if {$index < 0} {return ""} #this only short-circuits an index greater than length including ansi-chars #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length if {$index > [string length $string]} {return ""} } else { if {[string match end* $index]} { #for end- we will probably have to blow a few cycles stripping first and calculate the length if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { return "" } } else { set offset 0 } #by now, if op = + then offset = 0 so we only need to handle the minus case set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal if {$offset == 0} { set index [expr {$payload_len-1}] } else { set index [expr {($payload_len-1) - $offset}] } if {$index < 0} { #don't waste time splitting and looping the string return "" } } else { #we are trying to avoid evaluating unbraced expr of potentially insecure origin regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string if {[string is integer -strict $tail]} { #plain +- if {$op eq "-"} { #return nothing for negative indices as per Tcl's lindex etc return "" } set index $tail } else { if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } } } #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) set low -1 set high -1 set pt_index -2 set pt_found -1 set char "" set codes_in_effect "" #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) foreach {pt code} $splits { incr pt_index 2 if {$pt ne ""} { set low [expr {$high + 1}] ;#last high incr high [string length $pt] } if {$pt ne "" && ($index >= $low && $index <= $high)} { set pt_found $pt_index set char [string index $pt $index-$low] break } if {[punk::ansi::codetype::is_sgr_reset $code]} { #we can throw away previous codes_in_effect set codes_in_effect "" } else { #may have partial resets - but we don't want to track individual states of SGR features #A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. #Review - consider if any other types of code make sense to retain in the output in this context. if {[punk::ansi::codetype::is_sgr $code]} { append codes_in_effect $code } } } if {$pt_found >= 0} { return $codes_in_effect$char } else { return "" } } proc _splits_trimleft {sclist} { set intext 0 set outlist [list] foreach {pt ansiblock} $sclist { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimleft $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } return $outlist } proc _splits_trimright {sclist} { set intext 0 set outlist [list] foreach {pt ansiblock} [lreverse $sclist] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimright $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } return [lreverse $outlist] } proc _splits_trim {sclist} { return [_splits_trimright [_splits_trimleft $sclist]] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } namespace eval punk::ansi::internal { proc splitn {str {len 1}} { #from textutil::split::splitn if {$len <= 0} { return -code error "len must be > 0" } if {$len == 1} { return [split $str {}] } set result [list] set max [string length $str] set i 0 set j [expr {$len -1}] while {$i < $max} { lappend result [string range $str $i $j] incr i $len incr j $len } return $result } proc splitx {str {regexp {[\t \r\n]+}}} { #from textutil::split::splitx # Bugfix 476988 if {[string length $str] == 0} { return {} } if {[string length $regexp] == 0} { return [::split $str ""] } if {[regexp $regexp {}]} { return -code error \ "splitting on regexp \"$regexp\" would cause infinite loop" } set list {} set start 0 while {[regexp -start $start -indices -- $regexp $str match submatch]} { foreach {subStart subEnd} $submatch break foreach {matchStart matchEnd} $match break incr matchStart -1 incr matchEnd lappend list [string range $str $start $matchStart] if {$subStart >= $start} { lappend list [string range $str $subStart $subEnd] } set start $matchEnd } lappend list [string range $str $start end] return $list } proc printing_length_addchar {i c} { upvar outchars outc upvar outsizes outs set nxt [llength $outc] if {$i < $nxt} { lset outc $i $c } else { lappend outc $c } } #string to 2digit hex - e.g used by XTGETTCAP proc str2hex {input} { set 2hex "" foreach ch [split $input ""] { append 2hex [format %02X [scan $ch %c]] } return $2hex } proc hex2str {2digithexchars} { set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) if {$2digithexchars eq ""} { return "" } if {[string length $2digithexchars] % 2 != 0} { error "hex2str requires an even number of hex digits (2 per character)" } set 2str "" foreach pair [splitn $2digithexchars 2] { append 2str [format %c 0x$pair] } return $2str } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::ansi [namespace eval punk::ansi { variable version set version 999999.0a1.0 }] return #*** !doctools #[manpage_end]