# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2023 # # @@ Meta Begin # Application punk::ansi 0.1.1 # Meta platform tcl # Meta license # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::ansi 0 0.1.1] #[copyright "2023"] #[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] #[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] #[require punk::ansi] #[keywords module ansi terminal console string] #[description] #[para]Ansi based terminal control string functions #[para]See [package punk::ansi::console] for related functions for controlling a console # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::ansi #[para]punk::ansi functions return their values - no implicit emission to console/stdout #[subsection Concepts] #[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner #[para]There are many differences in terminal implementations - but most should support a core set of features #[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. #[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::ansi #[list_begin itemized] package require Tcl 8.6- package require punk::char #*** !doctools #[item] [package {Tcl 8.6-}] #[item] [package {punk::char}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] namespace eval punk::ansi::class { if {![llength [info commands class_ansi]]} { oo::class create class_ansi { variable o_raw variable o_render_dimensions ;#last dimensions at which we rendered variable o_rendered variable o_rendered_what constructor {ansitext {dimensions 80x25}} { if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } set o_rendered_what "" set o_render_dimensions $dimensions set o_raw $ansitext } method rawdata {} { return $o_raw } method render {{dimensions ""}} { if {$dimensions eq ""} { set dimensions $o_render_dimensions } if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } if {$o_rendered_what ne $o_raw || $dimensions ne $o_render_dimensions} { set b [textblock::block $w $h " "] set o_rendered [overtype::left $b $o_raw] set o_rendered_what $o_raw set o_render_dimensions $dimensions } #todo - store rendered and allow partial rendering of new data lines? return $o_rendered } method viewlines {} { return [ansistring VIEW $o_raw] } method viewcodes {} { return [ansistring VIEWCODES $o_raw] } method viewchars {} { return [punk::ansi::stripansiraw $o_raw] } } } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::ansi { #*** !doctools #[subsection {Namespace punk::ansi}] #[para] Core API functions for punk::ansi #[list_begin definitions] #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control variable test "blah\033\[1;33mETC\033\[0;mOK" #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. namespace export\ {a?} {a+} a \ ansistring\ convert*\ clear*\ cursor_*\ detect*\ get_*\ move*\ reset*\ strip*\ test_decaln\ titleset\ variable escape_terminals #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals dict set escape_terminals DCS [list \007 \033\\ \u009c] dict set escape_terminals MISC [list \007 \033\\ \u009c] #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? #review - there doesn't seem to be an \x1b#7 # https://espterm.github.io/docs/VT100%20escape%20codes.html #self-contained 2 byte ansi escape sequences - review more? set ansi_2byte_codes_dict [dict create\ "reset_terminal" "\u001bc"\ "save_cursor_posn" "\u001b7"\ "restore_cursor_posn" "\u001b8"\ "cursor_up_one" "\u001bM"\ "NEL - Next Line" "\u001bE"\ "IND - Down one line" "\u001bD"\ "HTS - Set Tab Stop" "\u001bH"\ "DECPAM app keypad" "\x1b="\ "DECPNM norm keypad" "\x1b>"\ ] #review - We have file possibly encoded directly in another codepage such as 437 - or utf8,utf16 etc, but then still needing post conversion to e.g cp437? proc readfile {fname} { #todo #1- look for BOM - read according to format given by BOM #2- assume utf-8 #3- if errors - assume cp437? set data [fcat $fname] if {[file extension $fname] eq ".ans"} { set ansidata [encoding convertfrom cp437 $data] } else { set ansidata $data } set obj [punk::ansi::class::class_ansi new $ansidata] return $obj } proc is_utf8_char {char} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) } $char } proc get_utf8 {text} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) \A ( [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) ) + } $text completeChars return $completeChars } #control strings #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf # #A control string is a string of bit combinations which may occur in the data stream as a logical entity for #control purposes. A control string consists of an opening delimiter, a command string or a character string, #and a terminating delimiter, the STRING TERMINATOR (ST). #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. #A character string is a sequence of any bit combination, except those representing START OF STRING #(SOS) or STRING TERMINATOR (ST). #The interpretation of the command string or the character string is not defined by this Standard, but instead #requires prior agreement between the sender and the recipient of the data. #The opening delimiters defined in this Standard are #a) APPLICATION PROGRAM COMMAND (APC) #b) DEVICE CONTROL STRING (DCS) #c) OPERATING SYSTEM COMMAND (OSC) #d) PRIVACY MESSAGE (PM) #e) START OF STRING (SOS) # #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. #review - can terminals handle SGR codes within a PM? #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) proc controlstring_PM {text} { return "\x1b^${text}\033\\" } proc controlstring_PM8 {text} { return "\x9e${text}\x9c" } proc controlstring_SOS {text} { return "\x1bX${text}\033\\" } proc controlstring_SOS8 {text} { return "\x98${text}\x9c" } proc controlstring_APC {text} { return "\x1b_${text}\033\\" } proc controlstring_APC8 {text} { return "\x9f${text}\x9c" } #there is also the SGR hide code (8) which has intermittent terminal support #This doesn't change the output length - so support is tricky to detec. (terminal checksum report?) #candidate for zig/c implementation? proc stripansi {text} { #*** !doctools #[call [fun stripansi] [arg text] ] #[para]Return a string with ansi codes stripped out #[para]Alternate graphics chars are replaced with modern unicode equivalents (e.g boxdrawing glyphs) #using detect costs us a couple of uS - but saves time on plain text #we should probably leave this for caller - otherwise it ends up being called more than necessary #if {![::punk::ansi::ta::detect $text]} { # return $text #} set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } proc stripansiraw {text} { #*** !doctools #[call [fun stripansi] [arg text] ] #[para]Return a string with ansi codes stripped out #[para]Alternate graphics modes will be stripped - exposing the raw characters as they appear without graphics mode. #[para]ie instead of a horizontal line you may see: qqqqqq join [::punk::ansi::ta::split_at_codes $text] "" } proc stripansi1 {text} { #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW variable escape_terminals ;#dict variable ::punk::ansi::ta::standalone_code_map ;#map to empty string set text [convert_g0 $text] set text [string map $standalone_code_map $text] #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm #\x1b#3 double-height letters top half #\x1b#4 double-height letters bottom half #\x1b#5 single-width line #\x1b#6 double-width line #\x1b#8 dec test fill screen #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. #Theoretically line endings can occur within an ST payload (review e.g title?) #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) set inputlist [split $text ""] set outputlist [list] set in_escapesequence 0 #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) set i 0 foreach u $inputlist { set v [lindex $inputlist $i+1] set uv ${u}${v} if {$in_escapesequence eq "2b"} { #2nd byte - done. set in_escapesequence 0 } elseif {$in_escapesequence != 0} { set endseq [dict get $escape_terminals $in_escapesequence] if {$u in $endseq} { set in_escapesequence 0 } elseif {$uv in $endseq} { set in_escapesequence 2b ;#flag next byte as last in sequence } } else { #handle both 7-bit and 8-bit CSI and OSC if {[regexp {^(?:\033\[|\u009b)} $uv]} { set in_escapesequence CSI } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { set in_escapesequence OSC } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { set in_escapesequence DCS } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { #SOS,PM,APC - all terminated with ST set in_escapesequence MISC } else { lappend outputlist $u } } incr i } return [join $outputlist ""] } #review - what happens when no terminator? #todo - map other chars to unicode equivs # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set # esc) ?? proc convert_g0 {text} { #using not \033 inside to stop greediness - review how does it compare to ".*?" #variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #set re {\033\(0[^\033]*\033\(B} #set re {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} set re2 {\033\(0(.*)\033\(B} ;#capturing #puts --$g-- #box sample #lqk #x x #mqj #m = boxd_lur #set map [list l \u250f k \u2513] ;#heavy set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines #todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html set re_g0_open_or_close {\x1b\(0|\x1b\(B} set parts [::punk::ansi::ta::_perlish_split $re_g0_open_or_close $text] set out "" set g0_on 0 foreach {pt g} $parts { if {$g0_on} { #split for non graphics-set codes set othersplits [punk::ansi::ta::split_codes $pt] ;#we don't need single codes here foreach {innerpt innercodes} $othersplits { append out [string map $map $innerpt] append out $innercodes ;#Simplifying assumption - ST codes, titlesets etc don't require/use g0 content } } else { append out $pt ;#may include other codes - put it all through. } if {$g ne ""} { if {[punk::ansi::codetype::is_gx_open $g]} { set g0_on 1 } elseif {[punk::ansi::codetype::is_gx_close $g]} { set g0_on 0 } } } return $out } proc convert_g0_wrong {text} { #Attempting to split on a group is wrong - because there could be other ansi codes while inside a g0 section #That will either stop us matching - so no conversion - or risk converting parts of the ansi codes #using not \033 inside to stop greediness - review how does it compare to ".*?" #variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} set re {\033\(0[^\033]*\033\(B} set re2 {\033\(0(.*)\033\(B} ;#capturing #box sample #lqk #x x #mqj #m = boxd_lur #set map [list l \u250f k \u2513] ;#heavy set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines #todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html set parts [::punk::ansi::ta::_perlish_split $re $text] set out "" foreach {pt g} $parts { append out $pt if {$g ne ""} { #puts --$g-- regexp $re2 $g _match contents append out [string map $map $contents] } } return $out } proc g0 {text} { return \x1b(0$text\x1b(B } proc stripansi_gx {text} { #e.g "\033(0" - select VT100 graphics for character set G0 #e.g "\033(B" - reset #e.g "\033)0" - select VT100 graphics for character set G1 #e.g "\033)X" - where X is any char other than 0 to reset ?? #return [convert_g0 $text] return [string map [list "\x1b(0" "" \x1b(B" "" "\x1b)0" "" "\x1b)X" ""] $text] } #CSI m = SGR (Select Graphic Rendition) variable SGR_setting_map { bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 } variable SGR_colour_map { black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 } variable SGR_map set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] proc colourmap1 {{bgname White}} { package require textblock set bg [textblock::block 33 3 "[a+ $bgname] [a]"] set colormap "" for {set i 0} {$i <= 7} {incr i} { append colormap "_[a+ white bold 48\;5\;$i] $i [a]" } set map1 [overtype::left -transparent _ $bg "\n$colormap"] return $map1 } proc colourmap2 {{bgname White}} { package require textblock set bg [textblock::block 39 3 "[a+ $bgname] [a]"] set colormap "" for {set i 8} {$i <= 15} {incr i} { append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey } set map2 [overtype::left -transparent _ $bg "\n$colormap"] return $map2 } proc a? {args} { #*** !doctools #[call [fun a?] [opt {ansicode...}]] #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map if {![llength $args]} { set out "" append out $SGR_setting_map \n append out $SGR_colour_map \n try { package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try set bgname "White" set map1 [colourmap1 $bgname] set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] set map2 [colourmap2 $bgname] set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] append out [textblock::join $map1 " " $map2] \n #append out $map1[a] \n #append out $map2[a] \n } on error {result options} { puts stderr "Failed to draw colormap" puts stderr "$result" } finally { return $out } } else { set result [list] set rmap [lreverse $map] foreach i $args { if {[string is integer -strict $i]} { if {[dict exists $rmap $i]} { lappend result $i [dict get $rmap $i] } } else { if {[dict exists $map $i]} { lappend result $i [dict get $map $i] } } } return $result } } proc a+ {args} { #*** !doctools #[call [fun a+] [opt {ansicode...}]] #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation if {![llength $t]} { return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) } return "\x1b\[[join $t {;}]m" } proc a2 {args} { #*** !doctools #[call [fun a] [opt {ansicode...}]] #[para]Returns the ansi code to reset any current settings and apply those from the supplied list #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } proc a {args} { #*** !doctools #[call [fun a] [opt {ansicode...}]] #[para]Returns the ansi code to reset any current settings and apply those from the supplied list #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #variable SGR_setting_map { # bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 # underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 # reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 # overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 #} #variable SGR_colour_map { # black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 # Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 # BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 #} #don't disable ansi here. #we want this to be available to call even if ansi is off #variable SGR_map set t [list] foreach i $args { switch -- $i { bold {lappend t 1} dim {lappend t 2} blink {lappend t 5} fastblink {lappend t 6} noblink {lappend t 25} hide {lappend t 8} normal {lappend t 22} underline {lappend t 4} doubleunderline {lappend t 21} nounderline {lappend t 24} strike {lappend t 9} nostrike {lappend t 29} italic {lappend t 3} noitalic {lappend t 23} reverse {lappend t 7} noreverse {lappend t 27} defaultfb {lappend t 39} defaultbg {lappedn t 49} nohide {lappend t 28} overline {lappend t 53} nooverline {lappend t 55} frame {lappend t 51} framecircle {lappend t 52} noframe {lappend t 54} black {lappend t 30} red {lappend t 31} green {lappend t 32} yellow {lappend t 33} blue {lappend t 34} purple {lappend t 35} cyan {lappend t 36} white {lappend t 37} Black {lappend t 40} Red {lappend t 41} Green {lappend t 42} Yellow {lappend t 43} Blue {lappend t 44} Purple {lappend t 45} Cyan {lappend t 46} White {lappend t 47} BLACK {lappend t 100} RED {lappend t 101} GREEN {lappend t 101} YELLOW {lappend t 103} BLUE {lappend t 104} PURPLE {lappend t 105} CYAN {lappend t 106} WHITE {lappend t 107} default { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } proc ansiwrap {codes text} { return [a {*}$codes]$text[a] } proc get_code_name {code} { #*** !doctools #[call [fun get_code_name] [arg code]] #[para]for example #[para] get_code_name red will return 31 #[para] get_code_name 31 will return red variable SGR_map set res [list] foreach i [split $code ";"] { set ix [lsearch -exact $SGR_map $i] if {[string is digit -strict $code]} { if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} } else { #reverse lookup code from name if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} } } set res } proc reset {} { #*** !doctools #[call [fun reset]] #[para]reset console return "\x1bc" } proc reset_soft {} { #*** !doctools #[call [fun reset_soft]] return \x1b\[!p } proc reset_colour {} { #*** !doctools #[call [fun reset_colour]] #[para]reset colour only return "\x1b\[0m" } # -- --- --- --- --- proc clear {} { #*** !doctools #[call [fun clear]] return "\033\[2J" } proc clear_above {} { #*** !doctools #[call [fun clear_above]] return \033\[1J } proc clear_below {} { #*** !doctools #[call [fun clear_below]] return \033\[0J } proc clear_all {} { # - doesn't work?? return \033\[3J } #see also erase_ functions # -- --- --- --- --- proc cursor_on {} { #*** !doctools #[call [fun cursor_on]] return "\033\[?25h" } proc cursor_off {} { #*** !doctools #[call [fun cursor_off]] return "\033\[?25l" } # -- --- --- --- --- proc move {row col} { #*** !doctools #[call [fun move] [arg row] [arg col]] #[para]Return an ansi sequence to move to row,col #[para]aka cursor home return \033\[${row}\;${col}H } proc move_emit {row col data args} { #*** !doctools #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] #[para]Return an ansi string representing a move to row col with data appended #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout #[para]punk::console::move_emit_return will also return the cursor to the original position #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. set out "" if {$row eq "this"} { append out \033\[\;${col}G$data } else { append out \033\[${row}\;${col}H$data } foreach {row col data} $args { if {$row eq "this"} { append out \033\[\;${col}G$data } else { append out \033\[${row}\;${col}H$data } } return $out } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] return \033\[${n}C } proc move_back {{n 1}} { #*** !doctools #[call [fun move_back] [arg n]] return \033\[${n}D } proc move_up {{n 1}} { #*** !doctools #[call [fun move_up] [arg n]] return \033\[${n}A } proc move_down {{n 1}} { #*** !doctools #[call [fun move_down] [arg n]] return \033\[${n}B } proc move_column {col} { #*** !doctools #[call [fun move_column] [arg col]] return \x1b\[${col}G } proc move_row {row} { #*** !doctools #[call [fun move_row] [arg row]] #[para]VPA - Vertical Line Position Absolute return \x1b\[${row}d } # -- --- --- --- --- proc cursor_save {} { #*** !doctools #[call [fun cursor_save]] #[para] equivalent term::ansi::code::ctrl::sc return \x1b\[s } proc cursor_restore {} { #*** !doctools #[call [fun cursor_restore]] #[para] equivalent term::ansi::code::ctrl::rc return \x1b\[u } proc cursor_save_attributes {} { #*** !doctools #[call [fun cursor_save_attributes]] #[para] equivalent term::ansi::code::ctrl::sca return \x1b7 } proc cursor_restore_attributes {} { #*** !doctools #[call [fun cursor_restore_attributes]] #[para] equivalent term::ansi::code::ctrl::rca return \x1b8 } # -- --- --- --- --- proc erase_line {} { #*** !doctools #[call [fun erase_line]] return \033\[2K } proc erase_sol {} { #*** !doctools #[call [fun erase_sol]] #[para]Erase to start of line, leaving cursor position alone. return \033\[1K } proc erase_eol {} { #*** !doctools #[call [fun erase_eol]] return \033\[K } #see also clear_above clear_below # -- --- --- --- --- proc scroll_up {n} { #*** !doctools #[call [fun scroll_up] [arg n]] return \x1b\[${n}S } proc scroll_down {n} { #*** !doctools #[call [fun scroll_down] [arg n]] return \x1b\[${n}T } proc insert_spaces {count} { #*** !doctools #[call [fun insert_spaces] [arg count]] return \x1b\[${count}@ } proc delete_characters {count} { #*** !doctools #[call [fun delete_characters] [arg count]] return \x1b\[${count}P } proc erase_characters {count} { #*** !doctools #[call [fun erase_characters] [arg count]] return \x1b\[${count}X } proc insert_lines {count} { #*** !doctools #[call [fun insert_lines] [arg count]] return \x1b\[${count}L } proc delete_lines {count} { #*** !doctools #[call [fun delete_lines] [arg count]] return \x1b\[${count}M } proc cursor_pos {} { #*** !doctools #[call [fun cursor_pos]] #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin #[para]The output on screen will look something like ^[lb][lb]47;3R #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list return \033\[6n } proc cursor_pos_extended {} { #includes page e.g ^[[47;3;1R return \033\[?6n } #DECFRA - Fill rectangular area #REVIEW - vt100 accepts decimal values 132-126 and 160-255 ("in the current GL or GR in-use table") #some modern terminals accept and display characters outside this range - but this needs investigation. #in a modern unicode era - the restricted range doesn't make a lot of sense - but we need to see what terminal emulators actually do. #e.g what happens with double-width? #this wrapper accepts a char rather than a decimal value proc fill_rect {char t l b r} { set dec [scan $char %c] return \x1b\[$dec\;$t\;$l\;$b\;$r\$x } #DECFRA with decimal char value proc fill_rect_dec {decimal t l b r} { return \x1b\[$decimal\;$t\;$l\;$b\;$r\$x } proc checksum_rect {id page t l b r} { return "\x1b\[${id}\;${page}\;$t\;$l\;$b\;$r*y" } proc request_cursor_information {} { #*** !doctools #[call [fun request_cursor_information]] #[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report #[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin #[para]A stdin readloop will need to be in place to read this information return \x1b\[1\$w } proc request_tabstops {} { #*** !doctools #[call [fun request_tabstops]] #[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin return \x1b\[2\$w } #alternative to string terminator is \007 - proc titleset {windowtitle} { #*** !doctools #[call [fun titleset] [arg windowtitles]] #[para]Returns the code to set the title of the terminal window to windowtitle #[para]This may not work on terminals which have multiple panes/windows return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives } #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title #no cross-platform ansi-only mechanism ? proc test_decaln {} { #Screen Alignment Test #Reset margins, move cursor to the top left, and fill the screen with 'E' #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) return \x1b#8 } #length of text for printing characters only #- unicode and other non-printing chars and combining sequences should be handled by the ansifreestring_width call at the end. #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names #review - is there an existing library or better method? printing to a terminal and querying cursor position is relatively slow and terminals lie. #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. proc printing_length {line} { if {[string first \n $line] >= 0} { error "line_print_length must not contain newline characters" } #what if line has \v (vertical tab) ie more than one logical screen line? #review - detect ansi moves and warn/error? They would invalidate this algorithm #for a string with ansi moves - we would need to use the overtype::renderline function (which is a bit heavier) #arguably - \b and \r are cursor move operations too - so processing them here is not very symmetrical - review #the purpose of backspace (or line cr) in embedded text is unclear. Should it allow some sort of character combining/overstrike as it has sometimes done historically (nroff/less)? e.g a\b` as an alternative combiner or bolding if same char #This should presumably only be done if the over_strike (os) capability is enabled in the terminal. Either way - it presumably won't affect printing width? set line [punk::ansi::stripansi $line] #we can't use simple \b processing if we get ansi codes and aren't actually processing them (e.g moves) set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter #(* more correctly - moves cursor back) #Note some terminals process backspace before \v - which seems quite wrong #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces #normalize tabs to an appropriate* width #*todo - handle terminal/context where tabwidth != the default 8 spaces if {[string first \t $line] >= 0} { set line [textutil::tabify::untabify2 $line] } #NOTE - this is non-destructive backspace as it occurs in text blocks - and is likely different to the sequence coming from a terminal or editor which generally does a destructive backspace #e.g #This means for example that abc\b has a length of 3. Trailing or leading backslashes have no effect #set bs [format %c 0x08] #set line [string map [list "\r\b" "\r"] $line] ;#backsp following a \r will have no effect set line [string trim $line \b] ;#take off at start and tail only #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. #(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) set n 0 #set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. set chars [punk::char::grapheme_split $line] set cr_posns [lsearch -all $chars \r] set bs_posns [lsearch -all $chars \b] foreach p $cr_posns { lset chars $p } foreach p $bs_posns { lset chars $p } #mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner #build an output set idx 0 set outchars [list] set outsizes [list] # -- #tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code #this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above #we could reasonably do it with backspace - but cr is more difficult #note that \x08 \b etc won't work to create a compiled switch statement even with unbraced (separate argument) form of switch statement. #set bs "" #set cr ? # -- foreach c $chars { switch -- $c { { if {$idx > 0} { incr idx -1 } } { set idx 0 } default { #set nxt [llength $outchars] if {$idx < [llength $outchars]} { #overstrike? - should usually have no impact on width - width taken as last grapheme in that column #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. lset outchars $idx $c } else { lappend outchars $c } #punk::ansi::internal::printing_length_addchar $idx $c incr idx } } } return [punk::char::ansifreestring_width [join $outchars ""]] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi ---}] } namespace eval punk::ansi { # -- --- --- --- --- --- #XTGETTCAP # xterm responds with # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and # the value of the corresponding string that xterm would send, # or # DCS 0 + r ST for invalid requests. # The strings are encoded in hexadecimal (2 digits per # character). If more than one name is given, xterm replies # with each name/value pair in the same response. An invalid # name (one not found in xterm's tables) ends processing of the # list of names. proc xtgetcap {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\x1bP+q$payload\x1b\\" } proc xtgetcap2 {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\u0090+q$payload\u009c" } namespace eval codetype { #Functions that are primarily intended to operate on a single ansi code sequence - rather than a sequence, or codes embedded in another string #in some cases multiple sequences or leading trailing strings are ok - but the proc docs should note where the function is looking #review - separate namespace for functions that operate on multiple or embedded? proc is_sgr {code} { #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline #we will accept and pass through the less common colon separator (ITU Open Document Architecture) #Terminals should generally ignore it if they don't use it regexp {\033\[[0-9;:]*m$} $code } #review - has_cursor_move_in_line? Are we wanting to allow strings/sequences and detect that there are no moves that *aren't* within line? proc is_cursor_move_in_line {code {knownline ""}} { if {[regexp {\033\[[0-9]*(:?C|D|G)$} $code]} { return 1 } if {[string is integer -strict $knownline]} { #CSI n : m H where row n happens to be current line - review/test set re [string map [list %n% $knownline] {\x1b\[%n%:[0-9]*H$}] if {[regexp $re $code]} { return 1 } } return 0 } #pure SGR reset with no other functions proc is_sgr_reset {code} { #*** !doctools #[call [fun is_sgr_reset] [arg code]] #[para]Return a boolean indicating whether this string has a trailing pure SGR reset #[para]Note that if the reset is not the very last item in the string - it will not be detected. #[para]This is primarily intended for testing a single ansi code sequence, but code can be any string where the trailing SGR code is to be tested. #todo 8-bit csi regexp {\x1b\[0*m$} $code } #whether this code has 0 (or equivalently empty) parameter (but may set others) #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes #it generally only makes sense for the reset to be the first parameter - otherwise the code has ineffective portions #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. #We only look at the initial parameter within the trailing SGR code as this is the well-formed normal case. #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code proc has_sgr_leadingreset {code} { #*** !doctools #[call [fun has_sgr_leadingreset] [arg code]] #[para]The reset must be the very first item in code to be detected. Trailing strings/codes ignored. set params "" #we need non-greedy if {[regexp {^\033\[([^m]*)m} $code _match params]} { #must match trailing m to be the type of reset we're looking for set plist [split $params ";"] if {[string trim [lindex $plist 0] 0] eq ""} { #e.g \033\[m \033\[0\;...m \033\[0000...m return 1 } else { return 0 } } else { return 0 } } proc is_gx {code} { #g0 {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #g1 {(?:\x1b\)0)(?:(?!\x1b\)B).)*\x1b\)B} regexp {\x1b(?:\(0(?:(?:(?!\x1b\(B).)*\x1b\(B)|\)0(?:(?:(?!\x1b\)B).)*\x1b\)B))} $code } proc is_gx_open {code} { #todo g2,g3? #pin to start and end with ^ and $ ? #regexp {\x1b\(0|\x1b\)0} $code regexp {\x1b(?:\(0|\)0)} $code } proc is_gx_close {code} { #regexp {\x1b\(B|\x1b\)B} $code regexp {\x1b(?:\(B|\)B)} $code } #has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? } namespace eval sequence_type { proc is_Fe {code} { # C1 control codes if {[regexp {^\033\[[\u0040-\u005F]}]} { #7bit - typical case return 1 } #8bit #review - all C1 escapes ? 0x80-0x90F #This is possibly problematic as it is affected by encoding. #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." return 0 } proc is_Fs {code} { puts stderr "is_Fs unimplemented" } } # -- --- --- --- --- --- --- --- --- --- --- #todo - implement colour resets like the perl module: #https://metacpan.org/pod/Text::ANSI::Util #(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) } namespace eval punk::ansi::ta { #*** !doctools #[subsection {Namespace punk::ansi::ta}] #[para] text ansi functions #[para] based on but not identical to the Perl Text Ansi module: #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm #[list_begin definitions] namespace path ::punk::ansi #handle both 7-bit and 8-bit csi #review - does codepage affect this? e.g ebcdic has 8bit csi in different position #CSI #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m variable re_csi_open {(?:\x1b\[|\u009b)} #variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@\^_\{|\}\[\]~`]} variable re_csi_code {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]} #intermediate bytes range 0x20-0x2F (ascii space and !"#$%&'()*+,-./) #parameter bytes range 0x30-0x3F (ascii 0-9:;<=>?) #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). #colour and style variable re_sgr {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m #OSC - termnate with BEL (\a \007) or ST (string terminator \x1b\\) # 8-byte string terminator is \x9c (\u009c) #non-greedy by excluding ST terminators variable re_esc_osc1 {(?:\x1b\])(?:[^\007]*)\007} #variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} ;#somewhat wrong - we want to exclude the ST - not other esc sequences variable re_esc_osc2 {(?:\x1b\])(?:(?!\x1b\\).)*\x1b\\} variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} variable re_osc_open {(?:\x1b\]|\u009d).*} variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} variable re_altg0_open {(?:\x1b\(0)} variable re_altg0_close {(?:\x1b\(B)} # DCS "ESC P" or "0x90" is also terminated by ST set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} #ST terminators [list \007 \033\\ \u009c] #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) #non-greedy by exclusion of ST terminators in body #we need to match \x1b\\ not just \x1b There could be colour codes nested in a privacy msg/string #even if terminals generally don't support that - it's quite possible for an ansi code to get nested this way - and we'd prefer it not to break our splits #Just checking for \x1b will terminate the match too early #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) #variable re_ST {(?:\x1bX|\u0098|\x1b\^|\u009E|\x1b_|\u009F)(?:[^\x1b\007\u009c]*)(?:\x1b\\|\007|\u009c)} ;#downsides: early terminating with nests, mixes 7bit 8bit start/ends (does that exist in the wild?) #keep our 8bit/7bit start-end codes separate variable re_ST {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)} #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open #default for regexes is non-newline-sensitive matching - ie matches can span lines # -- --- --- --- variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" # -- --- --- --- #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regext TRIE generator that works with Tcl regexes #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. variable re_ansi_detect {(?:\x1b(?:\((?:0|B)|\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|007).)*(?:\x1b\\|\007))|c|7|8|M|E|D|H|=|>|(?:#(?:3|4|5|6|8))))|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)|(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]|(?:\u009d)(?:[^\u009c]*)?\u009c} # -- --- --- --- variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_ST_open}|${re_altg0_open}" #may be same as detect - kept in case detect needs to diverge #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" set re_ansi_split $re_ansi_detect #detect any ansi escapes #review - only detect 'complete' codes - or just use the opening escapes for performance? proc detect {text} { #*** !doctools #[call [fun detect] [arg text]] #[para]Return a boolean indicating whether Ansi codes were detected in text #[para] variable re_ansi_detect expr {[regexp $re_ansi_detect $text]} } proc detect2 {text} { variable re_ansi_detect2 expr {[regexp $re_ansi_detect2 $text]} } proc detect_open {text} { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } #not in perl ta proc detect_csi {text} { #*** !doctools #[call [fun detect_csi] [arg text]] #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation #[para]There is also a multi-byte escape sequence \u009b #[para]This is less commonly used but is also detected here #[para](This function is not in perl ta) variable re_csi_open expr {[regexp $re_csi_open $text]} } proc detect_sgr {text} { #*** !doctools #[call [fun detect_sgr] [arg text]] #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. #[para]This is the set of CSI sequences ending in 'm' #[para]This is most commonly an Ansi colour code - but also things such as underline and italics #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. #[para](This function is not in perl ta) variable re_sgr expr {[regexp $re_sgr $text]} } proc strip {text} { #*** !doctools #[call [fun strip] [arg text]] #[para]Return text stripped of Ansi codes #[para]This is a tailcall to punk::ansi::stripansi tailcall stripansi $text } proc length {text} { #*** !doctools #[call [fun length] [arg text]] #[para]Return the character length after stripping ansi codes - not the printing length string length [stripansi $text] } #todo - handle newlines #not in perl ta #proc printing_length {text} { # #} proc trunc {text width args} { } #not in perl ta #returns just the plaintext portions in a list proc split_at_codes {text} { variable re_ansi_split punk::ansi::internal::splitx $text ${re_ansi_split} } # -- --- --- --- --- --- #Split $text to a list containing alternating ANSI color codes and text. #ANSI color codes are always on the second element, fourth, and so on. #(ie plaintext on odd list-indices ansi on even indices) # Example: #ta_split_codes "" # => "" #ta_split_codes "a" # => "a" #ta_split_codes "a\e[31m" # => {"a" "\e[31m"} #ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} #ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} #ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} #ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} # proc split_codes {text} { variable re_ansi_split set re "(?:${re_ansi_split})+" return [_perlish_split $re $text] } #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) proc split_codes_single {text} { variable re_ansi_split return [_perlish_split $re_ansi_split $text] } #review - tcl greedy expressions may match multiple in one element proc _perlish_split {re text} { if {[string length $text] == 0} { return {} } set list [list] set start 0 #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW while {[regexp -start $start -indices -- $re $text match]} { lassign $match matchStart matchEnd #puts "->start $start ->match $matchStart $matchEnd" if {$matchEnd < $matchStart} { lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] incr start if {$start >= [string length $text]} { break } continue } lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] set start [expr {$matchEnd+1}] #? if {$start >= [string length $text]} { break } } return [lappend list [string range $text $start end]] } proc _perlish_split2 {re text} { if {[string length $text] == 0} { return {} } set list [list] set start 0 #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW while {[regexp -start $start -indices -- $re $text match]} { lassign $match matchStart matchEnd #puts "->start $start ->match $matchStart $matchEnd" if {$matchEnd < $matchStart} { lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] incr start } else { lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] set start [expr {$matchEnd+1}] } if {$start >= [string length $text]} { break } } return [lappend list [string range $text $start end]] } proc _ws_split {text} { regexp -all -inline {(?:\S+)|(?:\s+)} $text } # -- --- --- --- --- --- #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } # -- --- --- --- --- --- --- --- --- --- --- namespace eval punk::ansi::ansistring { #*** !doctools #[subsection {Namespace punk::ansi::ansistring}] #[para]punk::ansi::ansistring ensemble - ansi-aware string operations #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. #[list_begin definitions] namespace path [list ::punk::ansi ::punk::ansi::ta] namespace ensemble create namespace export length length1 trim trimleft trimright index VIEW VIEWCODES INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX #todo - expose _splits_ methods so caller can work efficiently with the splits themselves #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single #\UFFFD - replacement char or \U2426 #using ISO 2047 graphical representations of control characters - probably obsolete? #00 NUL Null ⎕ U+2395 NU #01 TC1, SOH Start of Heading ⌈ U+2308 SH #02 TC2, STX Start of Text ⊥ U+22A5 SX #03 TC3, ETX End of Text ⌋ U+230B EX #04 TC4, EOT End of Transmission ⌁ U+2301[9] ET #05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ #06 TC6, ACK Acknowledge ✓ U+2713 AK #07 BEL Bell ⍾ U+237E[9] BL #08 FE0, BS Backspace ⤺ —[b] BS #09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT #0A FE2, LF Line Feed ≡ U+2261 LF #0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT #0C FE4, FF Form Feed ↡ U+21A1 FF #0D FE5, CR Carriage Return ⪪ U+2AAA CR #0E SO Shift Out ⊗ U+2297 SO #0F SI Shift In ⊙ U+2299 SI #10 TC7, DLE Data Link Escape ⊟ U+229F DL #11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 #12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 #13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 #14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 #15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK #16 TC9, SYN Synchronization ⎍ U+238D SY #17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB #18 CAN Cancel ⧖ U+29D6 CN #19 EM End of Medium ⍿ U+237F[9] EM #1A SUB Substitute Character ␦ U+2426[12] SB #1B ESC Escape ⊖ U+2296 EC #1C IS4, FS File Separator ◰ U+25F0 FS #1D IS3, GS Group Separator ◱ U+25F1 GS #1E IS2, RS Record Separator ◲ U+25F2 RS #1F IS1 US Unit Separator ◳ U+25F3 US #20 SP Space △ U+25B3 SP #7F DEL Delete ▨ —[d] DT #C0 control code visual representations # Code Val Name 2X Description # 2400 00 NUL NU Symbol for Null # 2401 01 SOH SH Symbol for Start of Heading # 2402 02 STX SX Symbol for Start of Text # 2403 03 ETX EX Symbol for End of Text # 2404 04 EOT ET Symbol for End of Transmission # 2405 05 ENQ EQ Symbol for Enquiry # 2406 06 ACK AK Symbol for Acknowledge # 2407 07 BEL BL Symbol for Bell # 2409 09 BS BS Symbol for Backspace # 2409 09 HT HT Symbol for Horizontal Tab (1) # 240A 0A LF LF Symbol for Line Feed (1) # 240B 0B VT VT Symbol for Vertical Tab (1) # 240C 0C FF FF Symbol for Form Feed (2) # 240D 0D CR CR Symbol for Carriage Return (1) # 240E 0E SO SO Symbol for Shift Out # 240F 0F SI SI Symbol for Shift In # 2410 10 DLE DL Symbol for Data Link Escape # 2411 11 DC1 D1 Symbol for Device Control 1 (2) # 2412 12 DC2 D2 Symbol for Device Control 2 (2) # 2413 13 DC3 D3 Symbol for Device Control 3 (2) # 2414 14 DC4 D4 Symbol for Device Control 4 (2) # 2415 15 NAK NK Symbol for Negative Acknowledge # 2416 16 SYN SY Symbol for Synchronous Idle # 2417 17 ETB EB Symbol for End of Transmission Block # 2418 18 CAN CN Symbol for Cancel # 2419 19 EM EM Symbol for End of Medium # 241A 1A SUB SU Symbol for Substitute # 241B 1B ESC EC Symbol for Escape # 241C 1C FS FS Symbol for Field Separator (3) # 241D 1D GS GS Symbol for Group Separator (3) # 241E 1E RS RS Symbol for Record Separator (3) # 241F 1F US US Symbol for Unit Separator (3) # 2420 20 SP SP Symbol for Space (4) # 2421 7F DEL DT Symbol for Delete (4) #C1 control code visual representations #Code Val Name 2X Description # 80 80 80 (1) # 81 81 81 (1) # E022 82 BPH 82 Symbol for Break Permitted Here (2) # E023 83 NBH 83 Symbol for No Break Here (2) # E024 84 IND IN Symbol for Index (3) # E025 85 NEL NL Symbol for Next Line (4) # E026 86 SSA SS Symbol for Start Selected Area # E027 87 ESA ES Symbol for End Selected Area # E028 88 HTS HS Symbol for Character Tabulation Set # E029 89 HTJ HJ Symbol for Character Tabulation with Justification # E02A 8A VTS VS Symbol for Line Tabulation Set # E02B 8B PLD PD Symbol for Partial Line Forward # E02C 8C PLU PU Symbol for Partial Line Backward # E02D 8D RI RI Symbol for Reverse Line Feed # E02E 8E SS2 S2 Symbol for Single Shift 2 # E02F 8F SS3 S3 Symbol for Single Shift 3 # E030 90 DCS DC Symbol for Device Control String # E031 91 PU1 P1 Symbol for Private Use 1 # E032 92 PU2 P2 Symbol for Private Use 2 # E033 93 STS SE Symbol for Set Transmit State # E034 94 CCH CC Symbol for Cancel Character # E035 95 MW MW Symbol for Message Waiting # E036 96 SPA SP Symbol for Start Protected (Guarded) Area # E037 97 EPA EP Symbol for End Protected (Guarded) Area # E038 98 SOS 98 Symbol for Start of String (2) # 99 99 (1) # E03A 9A SCI 9A Symbol for Single Character Introducer (2) # E03B 9B CSI CS Symbol for Control Sequence Introducer (5) # E03C 9C ST ST Symbol for String Terminator # E03D 9D OSC OS Symbol for Operating System Command # E03E 9E PM PM Symbol for Privacy Message # E03F 9F APC AP Symbol for Application Program Command proc VIEW {args} { #*** !doctools #[call [fun VIEW] [arg string]] #[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets #[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408') #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. if {![llength $args]} { return "" } set string [lindex $args end] set defaults [dict create\ -esc 1\ -cr 1\ -lf 0\ -vt 0\ -ht 1\ -bs 1\ -sp 1\ ] set argopts [lrange $args 0 end-1] if {[llength $argopts] % 2 != 0} { error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [dict keys $defaults]" } set opts [dict merge $defaults $argopts] # -- --- --- --- --- set opt_esc [dict get $opts -esc] set opt_cr [dict get $opts -cr] set opt_lf [dict get $opts -lf] set opt_vt [dict get $opts -vt] set opt_ht [dict get $opts -ht] set opt_bs [dict get $opts -bs] set opt_sp [dict get $opts -sp] # -- --- --- --- --- #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) #Goal is not to map every control character? #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly #ETX -ctrl-c #EOT ctrl-d (EOF?) #SYN ctrl-v #SUB ctrl-z #CAN ctrl-x #FS ctrl-\ (SIGQUIT) set visuals_interesting [dict create\ NUL [list \x00 \u2400]\ ETX [list \x03 \u2403]\ EOT [list \x04 \u2404]\ BEL [list \x07 \u2407]\ SYN [list \x16 \u2416]\ CAN [list \x18 \u2418]\ SUB [list \x1a \u241a]\ FS [list \x1c \u241c]\ SOS [list \x98 \ue038]\ CSI [list \x9b \ue03b]\ ST [list \x9c \ue03c]\ PM [list \x9e \ue03e]\ APC [list \x9f \ue03f]\ ] #it turns out we need pretty much everything for debugging set visuals_c0 [dict create\ NUL [list \x00 \u2400]\ SOH [list \x01 \u2401]\ STX [list \x02 \u2402]\ ETX [list \x03 \u2403]\ EOT [list \x04 \u2404]\ ENQ [list \x05 \u2405]\ ACK [list \x06 \u2406]\ BEL [list \x07 \u2407]\ FF [list \x0c \u240c]\ SO [list \x0e \u240e]\ SF [list \x0f \u240f]\ DLE [list \x10 \u2410]\ DC1 [list \x11 \u2411]\ DC2 [list \x12 \u2412]\ DC3 [list \x13 \u2413]\ DC4 [list \x14 \u2414]\ NAK [list \x15 \u2415]\ SYN [list \x16 \u2416]\ ETB [list \x17 \u2417]\ CAN [list \x18 \u2418]\ EM [list \x19 \u2419]\ SUB [list \x1a \u241a]\ FS [list \x1c \u241c]\ GS [list \x1d \u241d]\ RS [list \x1e \u241e]\ US [list \x1f \u241f]\ DEL [list \x7f \u2421]\ ] set visuals_c1 [dict create\ BPH [list \x82 \ue022]\ NBH [list \x83 \ue023]\ IND [list \x84 \ue024]\ NEL [list \x85 \ue025]\ SSA [list \x86 \ue026]\ ESA [list \x87 \ue027]\ HTS [list \x88 \ue028]\ HTJ [list \x89 \ue029]\ VTS [list \x8a \ue02a]\ PLD [list \x8b \ue02a]\ PLU [list \x8c \ue02c]\ RI [list \x8d \ue02d]\ SS2 [list \x8e \ue02e]\ SS3 [list \x8f \ue02f]\ DCS [list \x90 \ue030]\ PU1 [list \x91 \ue031]\ PU2 [list \x92 \ue032]\ STS [list \x93 \ue033]\ CCH [list \x94 \ue034]\ MW [list \x95 \ue035]\ SPA [list \x96 \ue036]\ EPA [list \x97 \ue037]\ SOS [list \x98 \ue038]\ SCI [list \x9a \ue03a]\ CSI [list \x9b \ue03b]\ ST [list \x9c \ue03c]\ OSC [list \x9d \ue03d]\ PM [list \x9e \ue03e]\ APC [list \x9f \ue03f]\ ] set visuals_opt [dict create] if {$opt_esc} { dict set visuals_opt ESC [list \x1b \u241b] } if {$opt_cr} { dict set visuals_opt CR [list \x0d \u240d] } if {$opt_lf} { dict set visuals_opt LF [list \x0a \u240a] } if {$opt_vt} { dict set visuals_opt VT [list \x0b \u240b] } if {$opt_ht} { dict set visuals_opt HT [list \x09 \u2409] } if {$opt_bs} { dict set visuals_opt BS [list \x08 \u2408] } if {$opt_sp} { dict set visuals_opt SP [list \x20 \u2420] } set visuals [dict merge $visuals_opt $visuals_c0 $visuals_c1] set charmap [list] dict for {nm chars} $visuals { lappend charmap {*}$chars } return [string map $charmap $string] #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs #return [string map [list \033 \U2296 \007 \U237E] $string] } proc VIEWCODES {string} { if {![llength $string]} { return "" } set redb [a+ red bold] set greenb [a+ green bold] set GX [a+ black White bold] set unk [a+ yellow bold] set RST [a] #don't split into lines first - \n is valid within ST sections set output "" set splits [punk::ansi::ta::split_codes_single $string] foreach {pt code} $splits { append output "$pt" if {[punk::ansi::codetype::is_sgr_reset $code]} { append output ${greenb}RST$RST } elseif {[punk::ansi::codetype::is_gx_open $code]} { append output ${GX}GX+$RST } elseif {[punk::ansi::codetype::is_gx_close $code]} { append output ${GX}GX-$RST } elseif {[punk::ansi::codetype::is_sgr $code]} { append output ${greenb}[ansistring VIEW $code]$RST } else { append output ${unk}[ansistring VIEW $code]$RST } } return $output } proc length {string} { #*** !doctools #[call [fun length] [arg string]] #[para]Returns the length of the string without ansi codes #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. #[para]This is equivalent to calling string length on the result of stripansi $string #[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal. #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. #todo - combiners/diacritics? just map them away here? set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set string [regsub -all $re_diacritics $string ""] #we want length to return number of glyphs.. not screen width. Has to be consistent with index function string length [stripansi $string] } #included as a test/verification - slightly slower. #grapheme split version may end up being used once it supports unicode grapheme clusters proc length2 {string} { #we want length to return number of glyphs.. not screen width. Has to be consistent with index function return [llength [punk::char::grapheme_split [stripansi $string]]] } proc trimleft {string args} { set intext 0 set out "" #for split_codes only first or last pt can be empty string foreach {pt ansiblock} [split_codes $string] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { append out $ansiblock } else { append out [string trimleft $pt]$ansiblock set intext 1 } } else { append out $pt$ansiblock } } return $out } proc trimright {string} { if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] return [join $rtrimmed_list ""] } proc trim {string} { #make sure we do our ansi-scanning split only once - so use list-based trim operations #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length #we save a single function call by calling both here rather than _splits_trim join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" } proc index {string index} { #*** !doctools #[call [fun index] [arg string] [arg index]] #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) #[para]Returns the character (with applied ansi effect) at position index #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. #[para]todo: SGR codes within ST-terminated strings not yet ignored properly #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. #[para]As any operation using end-+ will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible #[para]Notes: #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run #todo - end-x +/-x+/-x etc set original_index $index set index [string map [list _ ""] $index] #short-circuit some trivial cases if {[string is integer -strict $index]} { if {$index < 0} {return ""} #this only short-circuits an index greater than length including ansi-chars #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length if {$index > [string length $string]} {return ""} } else { if {[string match end* $index]} { #for end- we will probably have to blow a few cycles stripping first and calculate the length if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { return "" } } else { set offset 0 } #by now, if op = + then offset = 0 so we only need to handle the minus case set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal if {$offset == 0} { set index [expr {$payload_len-1}] } else { set index [expr {($payload_len-1) - $offset}] } if {$index < 0} { #don't waste time splitting and looping the string return "" } } else { #we are trying to avoid evaluating unbraced expr of potentially insecure origin regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string if {[string is integer -strict $tail]} { #plain +- if {$op eq "-"} { #return nothing for negative indices as per Tcl's lindex etc return "" } set index $tail } else { if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } } } #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) set low -1 set high -1 set pt_index -2 set pt_found -1 set char "" set codes_in_effect "" #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) foreach {pt code} $ansisplits { incr pt_index 2 #we want an index per grapheme - whether it is doublewide or single if {$pt ne ""} { set graphemes [punk::char::grapheme_split $pt] set low [expr {$high + 1}] ;#last high #incr high [string length $pt] incr high [llength $graphemes] } if {$pt ne "" && ($index >= $low && $index <= $high)} { set pt_found $pt_index #set char [string index $pt $index-$low] set char [lindex $graphemes $index-$low] break } if {[punk::ansi::codetype::is_sgr_reset $code]} { #we can throw away previous codes_in_effect set codes_in_effect "" } else { #may have partial resets - but we don't want to track individual states of SGR features #A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. #Review - consider if any other types of code make sense to retain in the output in this context. if {[punk::ansi::codetype::is_sgr $code]} { append codes_in_effect $code } } } if {$pt_found >= 0} { return $codes_in_effect$char } else { return "" } } #helper to convert indices (possibly of form x+y end-x etc) to numeric values within the payload range i.e without ansi #return empty string for each index that is out of range #review - this is possibly too slow to be very useful as is. # consider converting to oo and maintaining state of ansisplits so we don't repeat relatively expensive operations for same string proc INDEXABSOLUTE {string args} { set payload_len -1 ;# -1 as token to indicate we haven't calculated it yet (only want to call it once at most) set testindices [list] foreach index $args { if {[string is integer -strict $index]} { if {$index < 0} { lappend testindices "" } elseif {$index > [string length $string]} { #this only short-circuits an index greater than length including ansi-chars #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length lappend testindices "" } else { lappend testindices $index } } else { if {[string match end* $index]} { #for end- we will probably have to blow a few cycles stripping first and calculate the length if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { lappend testindices "" continue } } else { set offset 0 } #by now, if op = + then offset = 0 so we only need to handle the minus case if {$payload_len == -1} { set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal } if {$offset == 0} { set index [expr {$payload_len-1}] } else { set index [expr {($payload_len-1) - $offset}] } if {$index < 0} { lappend testindices "" } else { lappend testindices $index } } else { #we are trying to avoid evaluating unbraced expr of potentially insecure origin regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string if {[string is integer -strict $tail]} { #plain +- if {$op eq "-"} { #return nothing for negative indices as per Tcl's lindex etc lappend indices "" continue } set index $tail lappend testindices $index } else { if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } lappend testindices $index } } } #assert - we made exactly one append to testindices if there was no error } #we now have numeric or empty string indices - but haven't fully checked they are within the underlying payload length if {[join $testindices ""] eq ""} { #don't calc ansistring length if no indices to check return $testindices } if {$payload_len == -1} { set payload_len [punk::ansi::ansistring::length $string] } set indices [list] foreach ti $testindices { if {$ti ne ""} { if {$ti < $payload_len} { lappend indices $ti } else { lappend indices "" } } else { lappend indices "" } } return $indices } #Todo - rows! #return pair of column extents occupied by the character index supplied. #single-width grapheme will return pair of integers of equal value #doulbe-width grapheme will return a pair of consecutive indices proc INDEXCOLUMNS {string idx} { #There is an index per grapheme - whether it is 1 or 2 columns wide set index [lindex [INDEXABSOLUTE $string $idx] 0] if {$index eq ""} { return "" } set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run set low -1 ;#low and high grapheme indexes set high -1 set lowc 0 ;#low and high column (1 based) set highc 0 set col1 "" set col2 "" set row 1 foreach {pt code} $ansisplits { if {$pt ne ""} { set ptlines [split $pt \n] set ptlinecount [llength $ptlines] set ptlineindex 0 foreach ptline $ptlines { set graphemes [punk::char::grapheme_split $ptline] if {$ptlineindex > 0} { #todo - account for previous \n as a grapheme .. what column? It should theoretically be in the rightmost column #zero width set low [expr {$high + 1}] set lowc [expr {$highc + 1}] set high $low set highc $lowc if {$index == $low} { set char \n set col1 $lowc set col2 $col1 break } incr row set lowc 0 set highc 0 } set low [expr {$high + 1}] ;#last high set lowc [expr {$highc + 1}] set high [expr {$low + [llength $graphemes] -1}] set highc [expr {$lowc + [punk::char::ansifreestring_width $ptline] -1}] #puts "---row:$row lowc:$lowc highc:$highc $ptline graphemes:$graphemes" if {$index >= $low && $index <= $high} { set char [lindex $graphemes $index-$low] set prefix [join [lrange $graphemes 0 [expr {$index-$low-1}]] ""] set prefixlen [punk::char::ansifreestring_width $prefix] set col1 [expr {$lowc + $prefixlen}] set gwidth [punk::char::ansifreestring_width $char] if {$gwidth < 1} { puts stderr "ansistring INDEXCOLUMNS warning - grapheme width zero at column $col1 ??" return "" ;#grapheme doesn't occupy a column and isn't a newline? - review } set col2 [expr {$col1 + ($gwidth -1)}] break } incr ptlineindex } } } if {$col1 ne "" & $col2 ne ""} { return [list $col1 $col2] } } #multiple rows - return a list? #return the grapheme index that occupies column col (could be first or second half of 2-wide grapheme) proc COLUMNINDEX {string col} { set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run set lowindex -1 ;#low and high grapheme indexes set highindex -1 set lowc 0 ;#low and high column (1 based) set highc 0 set col1 "" set col2 "" foreach {pt code} $ansisplits { if {$pt ne ""} { if {[string first \n $pt] < 0} { set graphemes [punk::char::grapheme_split $pt] set lowindex [expr {$highindex + 1}] ;#last high set lowc [expr {$highc + 1}] set highindex [expr {$lowindex + [llength $graphemes] -1}] set highc [expr {$lowc + [punk::char::ansifreestring_width $pt] -1}] if {$col >= $lowc && $col <= $highc} { if {$col == $lowc} { return $lowindex } elseif {$col == $highc} { return $highindex } set index [expr {$lowindex + 1}] set str "" foreach g $graphemes { append str $g set width [punk::char::ansifreestring_width $str] if {$lowc + $width >= $col} { return $index } incr index } error "ansistring COLUMNINDEX '$string' $col not found" ;#assert - shouldn't happen } } else { error "ansistring COLUMNINDEX multiline not implemented" } } } } proc _splits_trimleft {sclist} { set intext 0 set outlist [list] foreach {pt ansiblock} $sclist { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimleft $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } return $outlist } proc _splits_trimright {sclist} { set intext 0 set outlist [list] foreach {pt ansiblock} [lreverse $sclist] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimright $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } return [lreverse $outlist] } proc _splits_trim {sclist} { return [_splits_trimright [_splits_trimleft $sclist]] } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } namespace eval punk::ansi::internal { proc splitn {str {len 1}} { #from textutil::split::splitn if {$len <= 0} { return -code error "len must be > 0" } if {$len == 1} { return [split $str {}] } set result [list] set max [string length $str] set i 0 set j [expr {$len -1}] while {$i < $max} { lappend result [string range $str $i $j] incr i $len incr j $len } return $result } proc splitx {str {regexp {[\t \r\n]+}}} { #from textutil::split::splitx # Bugfix 476988 if {[string length $str] == 0} { return {} } if {[string length $regexp] == 0} { return [::split $str ""] } if {[regexp $regexp {}]} { return -code error \ "splitting on regexp \"$regexp\" would cause infinite loop" } set list {} set start 0 while {[regexp -start $start -indices -- $regexp $str match submatch]} { foreach {subStart subEnd} $submatch break foreach {matchStart matchEnd} $match break incr matchStart -1 incr matchEnd lappend list [string range $str $start $matchStart] if {$subStart >= $start} { lappend list [string range $str $subStart $subEnd] } set start $matchEnd } lappend list [string range $str $start end] return $list } proc printing_length_addchar {i c} { upvar outchars outc upvar outsizes outs set nxt [llength $outc] if {$i < $nxt} { lset outc $i $c } else { lappend outc $c } } #string to 2digit hex - e.g used by XTGETTCAP proc str2hex {input} { set 2hex "" foreach ch [split $input ""] { append 2hex [format %02X [scan $ch %c]] } return $2hex } proc hex2str {2digithexchars} { set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) if {$2digithexchars eq ""} { return "" } if {[string length $2digithexchars] % 2 != 0} { error "hex2str requires an even number of hex digits (2 per character)" } set 2str "" foreach pair [splitn $2digithexchars 2] { append 2str [format %c 0x$pair] } return $2str } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::ansi [namespace eval punk::ansi { variable version set version 0.1.1 }] return #*** !doctools #[manpage_end]