# -*- tcl -*- # Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from -buildversion.txt # # Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. # Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # (C) 2023 # # @@ Meta Begin # Application punk::ansi 999999.0a1.0 # Meta platform tcl # Meta license # @@ Meta End # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ # doctools header # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[manpage_begin punkshell_module_punk::ansi 0 999999.0a1.0] #[copyright "2023"] #[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] #[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] #[require punk::ansi] #[keywords module ansi terminal console string] #[description] #[para]Ansi based terminal control string functions #[para]See [package punk::ansi::console] for related functions for controlling a console # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section Overview] #[para] overview of punk::ansi #[para]punk::ansi functions return their values - no implicit emission to console/stdout #[subsection Concepts] #[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner #[para]There are many differences in terminal implementations - but most should support a core set of features #[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. #[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Requirements # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[subsection dependencies] #[para] packages used by punk::ansi #[list_begin itemized] package require Tcl 8.6- package require punk::char package require punk::assertion #*** !doctools #[item] [package {Tcl 8.6-}] #[item] [package {punk::char}] # #package require frobz # #*** !doctools # #[item] [package {frobz}] #*** !doctools #[list_end] # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ #*** !doctools #[section API] namespace eval punk::ansi::class { if {![llength [info commands class_ansi]]} { oo::class create class_ansi { variable o_ansistringobj variable o_render_dimensions ;#last dimensions at which we rendered variable o_rendered variable o_rendered_what constructor {ansitext {dimensions 80x25}} { if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } #a straight string compare may be faster.. but a checksum is much smaller in memory, so we'll use that by default. set o_rendered_what "" #There may also be advantages to renering to a class_ansistring class object set o_render_dimensions $dimensions set o_ansistringobj [ansistring NEW $ansitext] } method get {} { return [$o_ansistringobj get] } method render {{dimensions ""}} { if {$dimensions eq ""} { set dimensions $o_render_dimensions } if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } set cksum "not-done" if {$dimensions ne $o_render_dimensions || $o_rendered_what ne [set cksum [$o_ansistringobj checksum]]} { #some ansi layout/art relies on wrapping at the width-dimension to display properly #this includes cursor movements ie right arrow can move cursor to columns in lines below #overflow is a different concept - perhaps not particularly congruent with the idea of the textblock as a mini terminal emulator. #overflow effectively auto-expands the block(terminal?) width #overflow and wrap both being true won't make sense unless we implement a max_overflow concept set o_rendered [overtype::left -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] if {$cksum eq "not-done"} { #if dimensions changed - the checksum won't have been done set o_rendered_what [$o_ansistringobj checksum] } else { set o_rendered_what $cksum } set o_render_dimensions $dimensions } #todo - store rendered and allow partial rendering of new data lines? return $o_rendered } method rendertest {{dimensions ""}} { if {$dimensions eq ""} { set dimensions $o_render_dimensions } if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { error "class_ansi::render dimensions must be of the form x" } set o_dimensions $dimensions set rendered [overtype::left -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" [$o_ansistringobj get]] return $rendered } method render_to_input_line {args} { if {[llength $args] < 1} { puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" } set x [lindex $args end] set arglist [lrange $args 0 end-1] if {[llength $arglist] %2 != 0} { puts stderr "render_to_input_line usage: ?-dimensions WxH? ?-minus charcount? x" } set defaults [dict create\ -dimensions 80x24\ -minus 0\ ] dict for {k v} $arglist { switch -- $k { -dimensions - -minus { } default { puts stderr "render_to_input_line unexpected argument '$k' usage: ?-dimensions WxH? ?-minus charcount? x" } } } set opts [dict merge $defaults $arglist] set opt_dimensions [dict get $opts -dimensions] set opt_minus [dict get $opts -minus] lassign [split $opt_dimensions x] w h if {![string is integer -strict $w] || ![string is integer -strict $h] || $w < 1 || $h < 1} { puts stderr "render_to_input_line WxH width & height must be positive integer values usage: ?-dimensions WxH? ?-minus charcount? x" } if {![string is integer -strict $opt_minus]} { puts stderr "render_to_input_line -minus must be positive integer value representing number of chars to exclude from end. usage: ?-dimensions WxH? ?-minus charcount? x" } package require textblock set lfvis [ansistring VIEW -lf 1 \n] set maplf [list \n "[a+ green bold reverse]${lfvis}[a]\n"] ;#a mapping to highlight newlines set lines [split [$o_ansistringobj get] \n] set rlines [lrange $lines 0 $x] set chunk [::join $rlines \n] append chunk \n if {$opt_minus ne "0"} { set chunk [string range $chunk 0 end-$opt_minus] } set rendered [overtype::left -experimental {test_mode} -overflow 0 -wrap 1 -width $w -height $h -appendlines 1 "" $chunk] set marker "" for {set i 1} {$i <= $w} {incr i} { if {$i % 10 == 0} { ::append marker "|" } elseif {$i % 5 == 0} { ::append marker * } else { ::append marker "." } } ::append rendered \n $marker set xline [lindex $rlines $x]\n set xlinev [ansistring VIEWSTYLE $xline] set xlinev [string map $maplf $xlinev] set xlinedisplay [overtype::left -wrap 1 -width $w -height 1 "" $xlinev] ::append rendered \n $xlinedisplay set chunk [ansistring VIEWSTYLE $chunk] set chunk [string map $maplf $chunk] #keep chunkdisplay narrower - leave at 80 or it will get unwieldy for larger image widths set chunkdisplay [overtype::left -wrap 1 -width 80 -height 1 "" $chunk] set renderheight [llength [split $rendered \n]] set chunkdisplay_lines [split $chunkdisplay \n] set chunkdisplay_tail [lrange $chunkdisplay_lines end-$renderheight end] set chunkdisplay_block [join $chunkdisplay_tail \n] #the input chunk lines are often much longer than the output.. resulting in main content being way up the screen. It's often impractical to view more than the tail of the chunkdisplay. textblock::join $rendered $chunkdisplay_block } method checksum {} { return [$o_ansistringobj checksum] } method checksum_last_rendered_input {} { return $o_rendered_what } #todo - fix class_ansistring so the ansistring methods can be called directly method viewlines {} { return [ansistring VIEW [$o_ansistringobj get]] } method viewcodes {} { return [ansistring VIEWCODES [$o_ansistringobj get]] } method viewchars {} { return [punk::ansi::stripansiraw [$o_ansistringobj get]] } method viewstyle {} { return [ansistring VIEWSTYLE [$o_ansistringobj get]] } method append_noreturn {ansistring} { $o_ansistringobj append $ansistring #don't return the raw data - it may be big and probably won't play nicely with default terminal dimensions etc. return } #like Tcl append - returns the result #Tcl's append changes a variable state, this changes the object state method append {ansistring} { $o_ansistringobj append $ansistring } } } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ namespace eval punk::ansi { #*** !doctools #[subsection {Namespace punk::ansi}] #[para] Core API functions for punk::ansi #[list_begin definitions] #old-school ansi graphics - C0 control glyphs. variable cp437_map #for cp437 images we need to map these *after* splitting ansi, to single-width unicode chars #It would also probably be problematic to map \u000A to the glyph - as this is the newline - it included in the map anyway for completeness. The caller may have to manually carve that or other specific c0 controls out of the map to use it depending on the situation(?) #Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs #most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too #by mapping these we can display regardless. #nul char - no cp437 image but commonly used as space in ansi graphics. #(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW dict set cp437_map \u0000 " " ;#space dict set cp437_map \u0001 \u263A ;#smiley dict set cp437_map \u0002 \u263B ;#smiley-filled dict set cp437_map \u0003 \u2665 ;#heart dict set cp437_map \u0004 \u2666 ;#diamond dict set cp437_map \u0005 \u2663 ;#club dict set cp437_map \u0006 \u2660 ;#spade dict set cp437_map \u0007 \u2022 ;#dot dict set cp437_map \u0008 \u25D8 ;#square hollow dot dict set cp437_map \u0009 \u25CB ;#hollow dot dict set cp437_map \u000A \u25D9 ;#square and dot (\n) dict set cp437_map \u000B \u2642 ;#male dict set cp437_map \u000C \u2640 ;#female dict set cp437_map \u000D \u266A ;#note1 (\r) dict set cp437_map \u000E \u266B ;#note2 dict set cp437_map \u000F \u263C ;#sun dict set cp437_map \u0010 \u25BA ;#right arrow triangle dict set cp437_map \u0011 \u25CA ;#left arrow triangle dict set cp437_map \u0012 \u2195 ;#updown arrow dict set cp437_map \u0013 \u203C ;#double bang dict set cp437_map \u0014 \u00B6 ;#pilcrow (paragraph mark / blind P) dict set cp437_map \u0015 \u00A7 ;#Section Sign dict set cp437_map \u0016 \u25AC ;#Heavy horizontal? dict set cp437_map \u0017 \u21A8 ;#updown arrow 2 ? dict set cp437_map \u0018 \u2191 ;#up arrow dict set cp437_map \u0019 \u2193 ;#down arrow dict set cp437_map \u001A \u2192 ;#right arrow dict set cp437_map \u001B \u2190 ;#left arrow dict set cp437_map \u001C \u221F ;#bottom left corner dict set cp437_map \u001D \u2194 ;#left-right arrow dict set cp437_map \u001E \u25B2 ;#up arrow triangle dict set cp437_map \u001F \u25BC ;#down arrow triangle variable map_special_graphics #DEC Special Graphics set https://en.wikipedia.org/wiki/DEC_Special_Graphics #AKA IBM Code page 1090 dict set map_special_graphics _ \u00a0 ;#no-break space dict set map_special_graphics "`" \u25c6 ;#black diamond dict set map_special_graphics a \u2592 ;#shaded block (checkerboard stipple), medium shade - Block Elements dict set map_special_graphics b \u2409 ;#symbol for HT dict set map_special_graphics c \u240c ;#symbol for FF dict set map_special_graphics d \u240d ;#symbol for CR dict set map_special_graphics e \u240a ;#symbol for LF dict set map_special_graphics f \u00b0 ;#degree sign dict set map_special_graphics g \u00b1 ;#plus-minus sign dict set map_special_graphics h \u2424 ;#symbol for NL dict set map_special_graphics i \u240b ;#symbol for VT dict set map_special_graphics j \u2518 ;#brc, light up and left - box drawing dict set map_special_graphics k \u2510 ;#trc, light down and left - box drawing dict set map_special_graphics l \u250c ;#tlc, light down and right - box drawing dict set map_special_graphics m \u2514 ;#blc, light up and right - box drawing dict set map_special_graphics n \u253c ;#light vertical and horizontal - box drawing dict set map_special_graphics o \u23ba ;#horizontal scan line-1 dict set map_special_graphics p \u23bb ;#horizontal scan line-3 dict set map_special_graphics q \u2500 ;#light horizontal - box drawing dict set map_special_graphics r \u23bc ;#horizontal scan line-7 dict set map_special_graphics s \u23bd ;#horizontal scan line-9 dict set map_special_graphics t \u251c ;#light vertical and right - box drawing dict set map_special_graphics u \u2524 ;#light vertical and left - box drawing dict set map_special_graphics v \u2534 ;#light up and horizontal - box drawing dict set map_special_graphics w \u252c ;#light down and horizontal - box drawing dict set map_special_graphics x \u2502 ;#light vertical - box drawing dict set map_special_graphics y \u2264 ;#less than or equal dict set map_special_graphics z \u2265 ;#greater than or equal dict set map_special_graphics "\{" \u03c0 ;#greek small letter pi dict set map_special_graphics "|" \u2260 ;#not equal to dict set map_special_graphics "\}" \u00a3 ;#pound sign dict set map_special_graphics ~ \u00b7 ;#middle dot #see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control variable test "blah\033\[1;33mETC\033\[0;mOK" #Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. namespace export\ {a?} {a+} a \ ansistring\ convert*\ clear*\ cursor_*\ detect*\ get_*\ move*\ reset*\ strip*\ test_decaln\ titleset\ variable escape_terminals #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] #dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals dict set escape_terminals DCS [list \007 \033\\ \u009c] dict set escape_terminals MISC [list \007 \033\\ \u009c] #NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) #This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? #review - there doesn't seem to be an \x1b#7 # https://espterm.github.io/docs/VT100%20escape%20codes.html #self-contained 2 byte ansi escape sequences - review more? set ansi_2byte_codes_dict [dict create\ "reset_terminal" "\u001bc"\ "save_cursor_posn" "\u001b7"\ "restore_cursor_posn" "\u001b8"\ "cursor_up_one" "\u001bM"\ "NEL - Next Line" "\u001bE"\ "IND - Down one line" "\u001bD"\ "HTS - Set Tab Stop" "\u001bH"\ "DECPAM app keypad" "\x1b="\ "DECPNM norm keypad" "\x1b>"\ ] # -------------------------------------- #comparitive test (performance) string-append vs 2-object (with existing splits) append proc test_cat1 {ansi1 ansi2} { #make sure objects have splits set s1 [ansistring NEW $ansi1] namespace eval [info object namespace $s1] {my MakeSplit} set s2 [ansistring NEW $ansi2] namespace eval [info object namespace $s2] {my MakeSplit} #operation under test # -- #standard string append $s1 append $ansi2 # -- $s2 destroy #$s1 append \033\[31mX ;#redX return $s1 } proc test_cat2 {ansi1 ansi2} { #make sure objects have splits set s1 [ansistring NEW $ansi1] namespace eval [info object namespace $s1] {my MakeSplit} set s2 [ansistring NEW $ansi2] namespace eval [info object namespace $s2] {my MakeSplit} #operation under test # -- #ansistring object append $s1 appendobj $s2 # -- $s2 destroy #$s1 append \033\[31mX ;#redX return $s1 } # -------------------------------------- #review - We have file possibly encoded directly in another codepage such as 437 - or utf8,utf16 etc, but then still needing post conversion to e.g cp437? #In testing old ansi graphics files available on the web, some files need encoding {utf-8 cp437} some just cp437 proc readfile {fname {encoding cp437}} { #todo #1- look for BOM - read according to format given by BOM #2- assume utf-8 #3- if errors - assume cp437? if {[llength $encoding] == 1} { set ansidata [fcat -encoding $encoding $fname] set obj [punk::ansi::class::class_ansi new $ansidata] } elseif {[llength $encoding] == 2} { set ansidata [fcat -encoding [lindex $encoding 0] $fname] set ansidata [encoding convertfrom [lindex $encoding 1] $ansidata] set obj [punk::ansi::class::class_ansi new $ansidata] } else { error "encoding list '$encoding' not supported. Use 1 or 2 encodings (first for file read, second as encoding convertfrom)" } return $obj } proc ansicat {fname args} { set encnames [encoding names] set encoding "" set dimensions "" set test_mode 0 foreach a $args { if {$a eq "test_mode"} { set test_mode 1 } elseif {$a in $encnames} { set encoding $a } else { if {[regexp {[0-9]+(?:x|X)[0-9]+} $a]} { set dimensions $a } } } if {$encoding eq ""} { set encoding cp437 } if {$dimensions eq ""} { set dimensions 80x24 } set ansidata [fcat -encoding $encoding $fname] set obj [punk::ansi::class::class_ansi new $ansidata] if {$test_mode} { set result [$obj rendertest $dimensions] } else { set result [$obj render $dimensions] } $obj destroy return $result } #utf-8/ascii encoded cp437 proc ansicat2 {fname {encoding utf-8}} { set data [fcat -encoding $encoding $fname] set ansidata [encoding convertfrom cp437 $data] set obj [punk::ansi::class::class_ansi new $ansidata] set result [$obj render] $obj destroy return $result } proc is_utf8_char {char} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) } $char } proc get_utf8 {text} { regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) \A ( [\x00-\x7F] | # Single-byte chars (ASCII range) [\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) [\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) [\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) ) + } $text completeChars return $completeChars } proc example {} { #todo - review dependency on punk::repo ? package require textblock package require punk::repo package require punk::console set fnames [list belinda.ans bot.ans flower.ans fish.ans] set base [punk::repo::find_project] set ansibase [file join $base src/testansi] if {![file exists $ansibase]} { puts stderr "Missing testansi folder at $base/src/testansi" puts stderr "Ensure ansi test files exist: $fnames" #error "punk::ansi::example Cannot find example files" } set missingbase [a+ yellow][textblock::block 80 23 ?][a] set pics [list] foreach f $fnames { if {![file exists $ansibase/$f]} { set p [overtype::left $missingbase "[a+ red bold]\nMissing file\n$ansibase/$f[a]"] lappend pics [dict create filename $f pic $p status missing] } else { set img [join [lines_as_list -line trimline -block trimtail [ansicat $ansibase/$f]] \n] lappend pics [dict create filename $f pic $img status ok] } } set termsize [punk::console:::get_size] set margin 4 set freewidth [expr {[dict get $termsize columns]-$margin}] set per_row [expr {$freewidth / 80}] set rowlist [list] set row [list] set i 1 foreach picinfo $pics { set subtitle "" if {[dict get $picinfo status] ne "ok"} { set subtitle [dict get $picinfo status] } set title [dict get $picinfo filename] lappend row [textblock::frame -subtitle $subtitle -title $title [dict get $picinfo pic]] if {$i % $per_row == 0} { lappend rowlist $row set row [list] } elseif {$i == [llength $pics]} { lappend rowlist $row } incr i } set result "" foreach r $rowlist { append result [textblock::join {*}$r] \n } return $result } #control strings #https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf # #A control string is a string of bit combinations which may occur in the data stream as a logical entity for #control purposes. A control string consists of an opening delimiter, a command string or a character string, #and a terminating delimiter, the STRING TERMINATOR (ST). #A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. #A character string is a sequence of any bit combination, except those representing START OF STRING #(SOS) or STRING TERMINATOR (ST). #The interpretation of the command string or the character string is not defined by this Standard, but instead #requires prior agreement between the sender and the recipient of the data. #The opening delimiters defined in this Standard are #a) APPLICATION PROGRAM COMMAND (APC) #b) DEVICE CONTROL STRING (DCS) #c) OPERATING SYSTEM COMMAND (OSC) #d) PRIVACY MESSAGE (PM) #e) START OF STRING (SOS) # #debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. # "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ #The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. #todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. #review - can terminals handle SGR codes within a PM? #Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) proc controlstring_PM {text} { return "\x1b^${text}\033\\" } proc controlstring_PM8 {text} { return "\x9e${text}\x9c" } proc controlstring_SOS {text} { return "\x1bX${text}\033\\" } proc controlstring_SOS8 {text} { return "\x98${text}\x9c" } proc controlstring_APC {text} { return "\x1b_${text}\033\\" } proc controlstring_APC8 {text} { return "\x9f${text}\x9c" } #there is also the SGR hide code (8) which has intermittent terminal support #This doesn't change the output length - so support is tricky to detec. (terminal checksum report?) #candidate for zig/c implementation? proc stripansi {text} { #*** !doctools #[call [fun stripansi] [arg text] ] #[para]Return a string with ansi codes stripped out #[para]Alternate graphics chars are replaced with modern unicode equivalents (e.g boxdrawing glyphs) #using detect costs us a couple of uS - but saves time on plain text #we should probably leave this for caller - otherwise it ends up being called more than necessary #if {![::punk::ansi::ta::detect $text]} { # return $text #} set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters join [::punk::ansi::ta::split_at_codes $text] "" } proc stripansiraw {text} { #*** !doctools #[call [fun stripansi] [arg text] ] #[para]Return a string with ansi codes stripped out #[para]Alternate graphics modes will be stripped - exposing the raw characters as they appear without graphics mode. #[para]ie instead of a horizontal line you may see: qqqqqq join [::punk::ansi::ta::split_at_codes $text] "" } proc stripansi1 {text} { #todo - character set selection - SS2 SS3 - how are they terminated? REVIEW variable escape_terminals ;#dict variable ::punk::ansi::ta::standalone_code_map ;#map to empty string set text [convert_g0 $text] set text [string map $standalone_code_map $text] #e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm #\x1b#3 double-height letters top half #\x1b#4 double-height letters bottom half #\x1b#5 single-width line #\x1b#6 double-width line #\x1b#8 dec test fill screen #we process char by char - line-endings whether \r\n or \n should be processed as per any other character. #Theoretically line endings can occur within an ST payload (review e.g title?) #ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) set inputlist [split $text ""] set outputlist [list] set in_escapesequence 0 #assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) set i 0 foreach u $inputlist { set v [lindex $inputlist $i+1] set uv ${u}${v} if {$in_escapesequence eq "2b"} { #2nd byte - done. set in_escapesequence 0 } elseif {$in_escapesequence != 0} { set endseq [dict get $escape_terminals $in_escapesequence] if {$u in $endseq} { set in_escapesequence 0 } elseif {$uv in $endseq} { set in_escapesequence 2b ;#flag next byte as last in sequence } } else { #handle both 7-bit and 8-bit CSI and OSC if {[regexp {^(?:\033\[|\u009b)} $uv]} { set in_escapesequence CSI } elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { set in_escapesequence OSC } elseif {[regexp {^(?:\033P|\u0090)} $uv]} { set in_escapesequence DCS } elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { #SOS,PM,APC - all terminated with ST set in_escapesequence MISC } else { lappend outputlist $u } } incr i } return [join $outputlist ""] } #review - what happens when no terminator? #todo - map other character sets to unicode equivs? There seems to be little support for other than the DEC special graphics set.. ISO2022 character switching not widely supported - may be best considered deprecated(?) # convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set # esc) ?? proc convert_g0 {text} { variable map_special_graphics #using not \033 inside to stop greediness - review how does it compare to ".*?" #variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #set re {\033\(0[^\033]*\033\(B} #set re {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #set re2 {\033\(0(.*)\033\(B} ;#capturing #puts --$g-- #box sample #lqk #x x #mqj #m = boxd_lur set re_g0_open_or_close {\x1b\(0|\x1b\(B} set parts [::punk::ansi::ta::_perlish_split $re_g0_open_or_close $text] set out "" set g0_on 0 foreach {other g} $parts { if {$g0_on} { #split for non graphics-set codes set othersplits [punk::ansi::ta::split_codes $other] ;#we don't need single codes here foreach {inner_plaintext inner_codes} $othersplits { append out [string map $map_special_graphics $inner_plaintext] $inner_codes #Simplifying assumption: no mapping required on any inner_codes - ST codes, titlesets etc don't require/use g0 content } } else { append out $other ;#may be a mix of plaintext and other ansi codes - put it all through. } #trust our splitting regex has done the work to leave us with only \x1b\(0 or \x1b(B - test last char rather than use punk::ansi::codetype::is_gx_open/is_gx_close switch -- [string index $g end] { 0 { set g0_on 1 } B { set g0_on 0 } } } return $out } proc convert_g0_wrong {text} { #Attempting to split on a group is wrong - because there could be other ansi codes while inside a g0 section #That will either stop us matching - so no conversion - or risk converting parts of the ansi codes #using not \033 inside to stop greediness - review how does it compare to ".*?" #variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} set re {\033\(0[^\033]*\033\(B} set re2 {\033\(0(.*)\033\(B} ;#capturing #box sample #lqk #x x #mqj #m = boxd_lur #set map [list l \u250f k \u2513] ;#heavy set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines #todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html set parts [::punk::ansi::ta::_perlish_split $re $text] set out "" foreach {pt g} $parts { append out $pt if {$g ne ""} { #puts --$g-- regexp $re2 $g _match contents append out [string map $map $contents] } } return $out } #Wrap text in ansi codes to switch to DEC alternate graphics character set. proc g0 {text} { return \x1b(0$text\x1b(B } proc stripansi_gx {text} { #e.g "\033(0" - select VT100 graphics for character set G0 #e.g "\033(B" - reset #e.g "\033)0" - select VT100 graphics for character set G1 #e.g "\033)X" - where X is any char other than 0 to reset ?? #return [convert_g0 $text] return [string map [list "\x1b(0" "" \x1b(B" "" "\x1b)0" "" "\x1b)X" ""] $text] } #CSI m = SGR (Select Graphic Rendition) variable SGR_setting_map { bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 } variable SGR_colour_map { black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 } variable SGR_map set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] proc colourmap1 {{bgname White}} { package require textblock set bg [textblock::block 33 3 "[a+ $bgname] [a]"] set colormap "" for {set i 0} {$i <= 7} {incr i} { append colormap "_[a+ white bold 48\;5\;$i] $i [a]" } set map1 [overtype::left -transparent _ $bg "\n$colormap"] return $map1 } proc colourmap2 {{bgname White}} { package require textblock set bg [textblock::block 39 3 "[a+ $bgname] [a]"] set colormap "" for {set i 8} {$i <= 15} {incr i} { append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey } set map2 [overtype::left -transparent _ $bg "\n$colormap"] return $map2 } proc a? {args} { #*** !doctools #[call [fun a?] [opt {ansicode...}]] #[para]Return an ansi string representing a table of codes and a panel showing the colours variable SGR_setting_map variable SGR_colour_map if {![llength $args]} { set out "" append out $SGR_setting_map \n append out $SGR_colour_map \n try { package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try set bgname "White" set map1 [colourmap1 $bgname] set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] set map2 [colourmap2 $bgname] set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] append out [textblock::join $map1 " " $map2] \n #append out $map1[a] \n #append out $map2[a] \n } on error {result options} { puts stderr "Failed to draw colormap" puts stderr "$result" } finally { return $out } } else { set result [list] set rmap [lreverse $map] foreach i $args { if {[string is integer -strict $i]} { if {[dict exists $rmap $i]} { lappend result $i [dict get $rmap $i] } } else { if {[dict exists $map $i]} { lappend result $i [dict get $map $i] } } } return $result } } proc a+ {args} { #*** !doctools #[call [fun a+] [opt {ansicode...}]] #[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation if {![llength $t]} { return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) } return "\x1b\[[join $t {;}]m" } proc a2 {args} { #*** !doctools #[call [fun a] [opt {ansicode...}]] #[para]Returns the ansi code to reset any current settings and apply those from the supplied list #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #don't disable ansi here. #we want this to be available to call even if ansi is off variable SGR_map set t [list] foreach i $args { if {[dict exists $SGR_map $i]} { lappend t [dict get $SGR_map $i] } else { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } proc a {args} { #*** !doctools #[call [fun a] [opt {ansicode...}]] #[para]Returns the ansi code to reset any current settings and apply those from the supplied list #[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text #[para] e.g to set foreground red and bold #[para]punk::ansi::a red bold #[para]to set background red #[para]punk::ansi::a Red #[para]see [cmd punk::ansi::a?] to display a list of codes #variable SGR_setting_map { # bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 # underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 # reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 # overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 #} #variable SGR_colour_map { # black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 # Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 # BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 #} #don't disable ansi here. #we want this to be available to call even if ansi is off #variable SGR_map set t [list] foreach i $args { switch -- $i { bold {lappend t 1} dim {lappend t 2} blink {lappend t 5} fastblink {lappend t 6} noblink {lappend t 25} hide {lappend t 8} normal {lappend t 22} underline {lappend t 4} doubleunderline {lappend t 21} nounderline {lappend t 24} strike {lappend t 9} nostrike {lappend t 29} italic {lappend t 3} noitalic {lappend t 23} reverse {lappend t 7} noreverse {lappend t 27} defaultfb {lappend t 39} defaultbg {lappedn t 49} nohide {lappend t 28} overline {lappend t 53} nooverline {lappend t 55} frame {lappend t 51} framecircle {lappend t 52} noframe {lappend t 54} black {lappend t 30} red {lappend t 31} green {lappend t 32} yellow {lappend t 33} blue {lappend t 34} purple {lappend t 35} cyan {lappend t 36} white {lappend t 37} Black {lappend t 40} Red {lappend t 41} Green {lappend t 42} Yellow {lappend t 43} Blue {lappend t 44} Purple {lappend t 45} Cyan {lappend t 46} White {lappend t 47} BLACK {lappend t 100} RED {lappend t 101} GREEN {lappend t 101} YELLOW {lappend t 103} BLUE {lappend t 104} PURPLE {lappend t 105} CYAN {lappend t 106} WHITE {lappend t 107} default { if {[string is integer -strict $i]} { lappend t $i } elseif {[string first ";" $i] >=0} { #literal with params lappend t $i } else { #accept examples for foreground # 256f-# or 256fg-# or 256f# # rgbf--- or rgbfg--- or rgbf-- switch -nocase -glob -- $i { "256f*" { set cc [string trim [string range $i 4 end] -gG] lappend t "38;5;$cc" } "256b*" { set cc [string trim [string range $i 4 end] -gG] lappend t "48;5;$cc" } "rgbf*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "38;2;$r;$g;$b" } "rgbb*" { set rgb [string trim [string range $i 4 end] -gG] lassign [split $rgb -] r g b lappend t "48;2;$r;$g;$b" } } } } } } # \033 - octal. equivalently \x1b in hex which is more common in documentation # empty list [a=] should do reset - same for [a= nonexistant] # explicit reset at beginning of parameter list for a= (as opposed to a+) set t [linsert $t 0 0] return "\x1b\[[join $t {;}]m" } proc ansiwrap {codes text} { return [a {*}$codes]$text[a] } proc get_code_name {code} { #*** !doctools #[call [fun get_code_name] [arg code]] #[para]for example #[para] get_code_name red will return 31 #[para] get_code_name 31 will return red variable SGR_map set res [list] foreach i [split $code ";"] { set ix [lsearch -exact $SGR_map $i] if {[string is digit -strict $code]} { if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} } else { #reverse lookup code from name if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} } } set res } proc reset {} { #*** !doctools #[call [fun reset]] #[para]reset console return "\x1bc" } proc reset_soft {} { #*** !doctools #[call [fun reset_soft]] return \x1b\[!p } proc reset_colour {} { #*** !doctools #[call [fun reset_colour]] #[para]reset colour only return "\x1b\[0m" } # -- --- --- --- --- proc clear {} { #*** !doctools #[call [fun clear]] return "\033\[2J" } proc clear_above {} { #*** !doctools #[call [fun clear_above]] return \033\[1J } proc clear_below {} { #*** !doctools #[call [fun clear_below]] return \033\[0J } proc clear_all {} { # - doesn't work?? return \033\[3J } #see also erase_ functions # -- --- --- --- --- proc cursor_on {} { #*** !doctools #[call [fun cursor_on]] return "\033\[?25h" } proc cursor_off {} { #*** !doctools #[call [fun cursor_off]] return "\033\[?25l" } # -- --- --- --- --- proc move {row col} { #*** !doctools #[call [fun move] [arg row] [arg col]] #[para]Return an ansi sequence to move to row,col #[para]aka cursor home return \033\[${row}\;${col}H } proc move_emit {row col data args} { #*** !doctools #[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] #[para]Return an ansi string representing a move to row col with data appended #[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points #[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout #[para]punk::console::move_emit_return will also return the cursor to the original position #[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. #[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. #[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin #[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. #[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: #[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] #[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. set out "" if {$row eq "this"} { append out \033\[\;${col}G$data } else { append out \033\[${row}\;${col}H$data } foreach {row col data} $args { if {$row eq "this"} { append out \033\[\;${col}G$data } else { append out \033\[${row}\;${col}H$data } } return $out } proc move_forward {{n 1}} { #*** !doctools #[call [fun move_forward] [arg n]] return \033\[${n}C } proc move_back {{n 1}} { #*** !doctools #[call [fun move_back] [arg n]] return \033\[${n}D } proc move_up {{n 1}} { #*** !doctools #[call [fun move_up] [arg n]] return \033\[${n}A } proc move_down {{n 1}} { #*** !doctools #[call [fun move_down] [arg n]] return \033\[${n}B } proc move_column {col} { #*** !doctools #[call [fun move_column] [arg col]] return \x1b\[${col}G } proc move_row {row} { #*** !doctools #[call [fun move_row] [arg row]] #[para]VPA - Vertical Line Position Absolute return \x1b\[${row}d } # -- --- --- --- --- proc cursor_save {} { #*** !doctools #[call [fun cursor_save]] #[para] equivalent term::ansi::code::ctrl::sc #[para] This is the ANSI/SCO cursor save as opposed to the DECSC version #[para] On many terminals either will work - but cursor_save_dec is shorter and perhaps more widely supported return \x1b\[s } proc cursor_restore {} { #*** !doctools #[call [fun cursor_restore]] #[para] equivalent term::ansi::code::ctrl::rc #[para] ANSI/SCO - see also cursor_restore_dec for the DECRC version return \x1b\[u } proc cursor_save_dec {} { #*** !doctools #[call [fun cursor_save_dec]] #[para] equivalent term::ansi::code::ctrl::sca #[para] DECSC return \x1b7 } proc cursor_restore_dec {} { #*** !doctools #[call [fun cursor_restore_attributes]] #[para] equivalent term::ansi::code::ctrl::rca #[para] DECRC return \x1b8 } # -- --- --- --- --- #DECAWM - automatic line wrapping proc enable_line_wrap {} { #*** !doctools #[call [fun enable_line_wrap]] #[para] enable automatic line wrapping when characters entered beyond rightmost column #[para] This will also allow forward movements to move to subsequent lines #[para] This is DECAWM - and is the same sequence output by 'tput smam' return \x1b\[?7h } proc disable_line_wrap {} { #*** !doctools #[call [fun disable_line_wrap]] #[para] disable automatic line wrapping #[para] reset DECAWM - same sequence output by 'tput rmam' #tput rmam return \x1b\[?7l } proc query_mode_line_wrap {} { #*** !doctools #[call [fun query_mode_line_wrap]] #[para] DECRQM to query line-wrap state #[para] The punk::ansi::query_mode_ functions just emit the ansi query sequence. return \x1b\[?7\$p } #DECRPM responses e.g: # \x1b\[?7\;1\$y # \x1b\[?7\;2\$y #where 1 = set, 2 = unset. (0 = mode not recognised, 3 = permanently set, 4 = permanently unset) #Alt screen buffer proc enable_alt_screen {} { #tput smcup outputs "\x1b\[?1049h\x1b\[22\;0\;0t" second esc sequence - DECSLPP? setting page height one less than main screen? #\x1b\[?1049h ;#xterm return \x1b\[?47h } proc disable_alt_screen {} { #tput rmcup outputs \x1b\[?1049l\x1b\[23\;0\;0t] #\x1b\[?1049l return \x1b\[?47l } # -- --- --- proc erase_line {} { #*** !doctools #[call [fun erase_line]] return \033\[2K } proc erase_sol {} { #*** !doctools #[call [fun erase_sol]] #[para]Erase to start of line, leaving cursor position alone. return \033\[1K } proc erase_eol {} { #*** !doctools #[call [fun erase_eol]] return \033\[K } #see also clear_above clear_below # -- --- --- --- --- proc scroll_up {n} { #*** !doctools #[call [fun scroll_up] [arg n]] return \x1b\[${n}S } proc scroll_down {n} { #*** !doctools #[call [fun scroll_down] [arg n]] return \x1b\[${n}T } proc insert_spaces {count} { #*** !doctools #[call [fun insert_spaces] [arg count]] return \x1b\[${count}@ } proc delete_characters {count} { #*** !doctools #[call [fun delete_characters] [arg count]] return \x1b\[${count}P } proc erase_characters {count} { #*** !doctools #[call [fun erase_characters] [arg count]] return \x1b\[${count}X } proc insert_lines {count} { #*** !doctools #[call [fun insert_lines] [arg count]] return \x1b\[${count}L } proc delete_lines {count} { #*** !doctools #[call [fun delete_lines] [arg count]] return \x1b\[${count}M } proc cursor_pos {} { #*** !doctools #[call [fun cursor_pos]] #[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin #[para]The output on screen will look something like ^[lb][lb]47;3R #[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. #[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. #[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list return \033\[6n } proc cursor_pos_extended {} { #includes page e.g ^[[47;3;1R return \033\[?6n } #DECFRA - Fill rectangular area #REVIEW - vt100 accepts decimal values 132-126 and 160-255 ("in the current GL or GR in-use table") #some modern terminals accept and display characters outside this range - but this needs investigation. #in a modern unicode era - the restricted range doesn't make a lot of sense - but we need to see what terminal emulators actually do. #e.g what happens with double-width? #this wrapper accepts a char rather than a decimal value proc fill_rect {char t l b r} { set dec [scan $char %c] return \x1b\[$dec\;$t\;$l\;$b\;$r\$x } #DECFRA with decimal char value proc fill_rect_dec {decimal t l b r} { return \x1b\[$decimal\;$t\;$l\;$b\;$r\$x } proc checksum_rect {id page t l b r} { return "\x1b\[${id}\;${page}\;$t\;$l\;$b\;$r*y" } proc request_cursor_information {} { #*** !doctools #[call [fun request_cursor_information]] #[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report #[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin #[para]A stdin readloop will need to be in place to read this information return \x1b\[1\$w } proc request_tabstops {} { #*** !doctools #[call [fun request_tabstops]] #[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report #[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin return \x1b\[2\$w } proc set_tabstop {} { return \x1bH } proc clear_tabstop {} { return \x1b\[g } proc clear_all_tabstops {} { return \x1b\[3g } #alternative to string terminator is \007 - proc titleset {windowtitle} { #*** !doctools #[call [fun titleset] [arg windowtitles]] #[para]Returns the code to set the title of the terminal window to windowtitle #[para]This may not work on terminals which have multiple panes/windows return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives } #titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title #no cross-platform ansi-only mechanism ? proc test_decaln {} { #Screen Alignment Test #Reset margins, move cursor to the top left, and fill the screen with 'E' #(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) return \x1b#8 } #length of text for printing characters only #- unicode and other non-printing chars and combining sequences should be handled by the ansifreestring_width call at the end. #certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names #review - is there an existing library or better method? printing to a terminal and querying cursor position is relatively slow and terminals lie. #Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first #If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. proc printing_length {line} { #string last faster than string first for long strings anyway if {[string last \n $line] >= 0} { error "line_print_length must not contain newline characters" } #what if line has \v (vertical tab) ie more than one logical screen line? #review - detect ansi moves and warn/error? They would invalidate this algorithm #for a string with ansi moves - we would need to use the overtype::renderline function (which is a bit heavier) #arguably - \b and \r are cursor move operations too - so processing them here is not very symmetrical - review #the purpose of backspace (or line cr) in embedded text is unclear. Should it allow some sort of character combining/overstrike as it has sometimes done historically (nroff/less)? e.g a\b` as an alternative combiner or bolding if same char #This should presumably only be done if the over_strike (os) capability is enabled in the terminal. Either way - it presumably won't affect printing width? set line [punk::ansi::stripansi $line] #we can't use simple \b processing if we get ansi codes and aren't actually processing them (e.g moves) set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi #backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter #(* more correctly - moves cursor back) #Note some terminals process backspace before \v - which seems quite wrong #backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already #leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line # - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. #curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS #Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) #for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces #normalize tabs to an appropriate* width #*todo - handle terminal/context where tabwidth != the default 8 spaces if {[string last \t $line] >= 0} { if {[info exists punk::console::tabwidth]} { set tw $::punk::console::tabwidth } else { set tw 8 } set line [textutil::tabify::untabify2 $line $tw] } #NOTE - this is non-destructive backspace as it occurs in text blocks - and is likely different to the sequence coming from a terminal or editor which generally does a destructive backspace #e.g #This means for example that abc\b has a length of 3. Trailing or leading backslashes have no effect #set bs [format %c 0x08] #set line [string map [list "\r\b" "\r"] $line] ;#backsp following a \r will have no effect set line [string trim $line \b] ;#take off at start and tail only #counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. #(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) set n 0 #set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. set chars [punk::char::grapheme_split $line] set cr_posns [lsearch -all $chars \r] set bs_posns [lsearch -all $chars \b] foreach p $cr_posns { lset chars $p } foreach p $bs_posns { lset chars $p } #mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner #build an output set idx 0 set outchars [list] set outsizes [list] # -- #tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code #this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above #we could reasonably do it with backspace - but cr is more difficult #note that \x08 \b etc won't work to create a compiled switch statement even with unbraced (separate argument) form of switch statement. #set bs "" #set cr ? # -- foreach c $chars { switch -- $c { { if {$idx > 0} { incr idx -1 } } { set idx 0 } default { #set nxt [llength $outchars] if {$idx < [llength $outchars]} { #overstrike? - should usually have no impact on width - width taken as last grapheme in that column #e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done #Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. lset outchars $idx $c } else { lappend outchars $c } #punk::ansi::internal::printing_length_addchar $idx $c incr idx } } } return [punk::char::ansifreestring_width [join $outchars ""]] } # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- #with thanks to Helmut Giese and other Authors of tcllib textutil #this version is adjusted to handle ANSI SGR strings #TODO! ANSI aware version proc untabifyLine { line num } { variable Spaces set currPos 0 while { 1 } { set currPos [string first \t $line $currPos] if { $currPos == -1 } { # no more tabs break } # how far is the next tab position ? set dist [expr {$num - ($currPos % $num)}] # replace '\t' at $currPos with $dist spaces set line [string replace $line $currPos $currPos $Spaces($dist)] # set up for next round (not absolutely necessary but maybe a trifle # more efficient) incr currPos $dist } return $line # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- --- } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi ---}] } namespace eval punk::ansi { # -- --- --- --- --- --- #XTGETTCAP # xterm responds with # DCS 1 + r Pt ST for valid requests, adding to Pt an = , and # the value of the corresponding string that xterm would send, # or # DCS 0 + r ST for invalid requests. # The strings are encoded in hexadecimal (2 digits per # character). If more than one name is given, xterm replies # with each name/value pair in the same response. An invalid # name (one not found in xterm's tables) ends processing of the # list of names. proc xtgetcap {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\x1bP+q$payload\x1b\\" } proc xtgetcap2 {keylist} { #ESC P = 0x90 = DCS = Device Control String set hexkeys [list] foreach k $keylist { lappend hexkeys [util::str2hex $k] } set payload [join $hexkeys ";"] return "\u0090+q$payload\u009c" } namespace eval codetype { #Functions that are primarily intended to operate on a single ansi code sequence - rather than a sequence, or codes embedded in another string #in some cases multiple sequences or leading trailing strings are ok - but the proc docs should note where the function is looking #review - separate namespace for functions that operate on multiple or embedded? proc is_sgr {code} { #SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline #we will accept and pass through the less common colon separator (ITU Open Document Architecture) #Terminals should generally ignore it if they don't use it regexp {\033\[[0-9;:]*m$} $code } #review - has_cursor_move_in_line? Are we wanting to allow strings/sequences and detect that there are no moves that *aren't* within line? proc is_cursor_move_in_line {code {knownline ""}} { if {[regexp {\033\[[0-9]*(:?C|D|G)$} $code]} { return 1 } if {[string is integer -strict $knownline]} { #CSI n : m H where row n happens to be current line - review/test set re [string map [list %n% $knownline] {\x1b\[%n%:[0-9]*H$}] if {[regexp $re $code]} { return 1 } } return 0 } #pure SGR reset with no other functions proc is_sgr_reset {code} { #*** !doctools #[call [fun is_sgr_reset] [arg code]] #[para]Return a boolean indicating whether this string has a trailing pure SGR reset #[para]Note that if the reset is not the very last item in the string - it will not be detected. #[para]This is primarily intended for testing a single ansi code sequence, but code can be any string where the trailing SGR code is to be tested. #todo 8-bit csi regexp {\x1b\[0*m$} $code } #whether this code has 0 (or equivalently empty) parameter (but may set others) #if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes #it generally only makes sense for the reset to be the first parameter - otherwise the code has ineffective portions #However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. #We only look at the initial parameter within the trailing SGR code as this is the well-formed normal case. #Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code proc has_sgr_leadingreset {code} { #*** !doctools #[call [fun has_sgr_leadingreset] [arg code]] #[para]The reset must be the very first item in code to be detected. Trailing strings/codes ignored. set params "" #we need non-greedy if {[regexp {^\033\[([^m]*)m} $code _match params]} { #must match trailing m to be the type of reset we're looking for set plist [split $params ";"] if {[string trim [lindex $plist 0] 0] eq ""} { #e.g \033\[m \033\[0\;...m \033\[0000...m return 1 } else { return 0 } } else { return 0 } } proc is_gx {code} { #g0 {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} #g1 {(?:\x1b\)0)(?:(?!\x1b\)B).)*\x1b\)B} regexp {\x1b(?:\(0(?:(?:(?!\x1b\(B).)*\x1b\(B)|\)0(?:(?:(?!\x1b\)B).)*\x1b\)B))} $code } proc is_gx_open {code} { #todo g2,g3? #pin to start and end with ^ and $ ? #regexp {\x1b\(0|\x1b\)0} $code regexp {\x1b(?:\(0|\)0)} $code } proc is_gx_close {code} { #regexp {\x1b\(B|\x1b\)B} $code regexp {\x1b(?:\(B|\)B)} $code } #input assumed to be single codes - simple test for 2nd char left bracket and trailing m is done anyway - codes not matching are ignored and passed through #This is not order-preserving if non-sgr codes are present as they are tacked on to the end even if they initially were before all SGR codes variable codestate_empty set codestate_empty [dict create] dict set codestate_empty rst "" ;#0 (or empty) dict set codestate_empty intensity "" ;#1 bold, 2 dim, 22 normal dict set codestate_empty italic "" ;#3 on 23 off dict set codestate_empty underline "" ;#4 on 24 off #nonstandard 4:3,4:4,4:5 dict set codestate_empty curlyunderline "" dict set codestate_empty dottedunderline "" dict set codestate_empty dashedunderline "" dict set codestate_empty blink "" ;#5 or 6 for slow/fast, 25 for off dict set codestate_empty reverse "" ;#7 on 27 off dict set codestate_empty hide "" ;#8 on 28 off dict set codestate_empty strike "" ;#9 on 29 off dict set codestate_empty font "" ;#10, 11-19 10 being primary dict set codestate_empty gothic "" ;#20 dict set codestate_empty doubleunderline "" ;#21 dict set codestate_empty proportional "" ;#26 - see note below dict set codestate_empty frame_or_circle "" ;#51,52 on - 54 off (54 off) (not generally used - mintty has repurposed for emoji variation selector) #ideogram rarely supported - this implementation untested - each attribute kept separate as they presumably can be applied simultaneously dict set codestate_empty ideogram_underline "" dict set codestate_empty ideogram_doubleunderline "" dict set codestate_empty ideogram_overline "" dict set codestate_empty ideogram_doubleoverline "" dict set codestate_empty ideogram_clear "" dict set codestate_empty overline "" ;#53 on 55 off - probably not supported - pass through. dict set codestate_empty underlinecolour "" ;#58 - same arguments as 256color and rgb (nonstandard - in Kitty ,VTE,mintty and iTerm2) # -- mintty? dict set codestate_empty superscript "" ;#73 dict set codestate_empty subscript "" ;#74 dict set codestate_empty nosupersub "" ;#75 # -- dict set codestate_empty fg "" ;#30-37 + 90-97 dict set codestate_empty bg "" ;#40-47 + 100-107 #as a common case optimisation - it will not merge a single element list, even if that code contains redundant elements proc sgr_merge_list {args} { if {[llength $args] == 0} { return "" } elseif {[llength $args] == 1} { return [lindex $args 0] } variable codestate_empty set othercodes [list] set codestate $codestate_empty set codestate_initial $codestate_empty ;#keep a copy for resets. set did_reset 0 #we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result? #There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes? #we will output 7bit merge of the SGRs even if some or all were 8bit CSi #As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals #auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism. #review - consider a higher-level option for always emitting 8bit or always 7bit #either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream. #We still output any non SGR codes in the list as they came in - preserving their CSI foreach c $args { #normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes #.. but preserve original c set cnorm [string map [list \x9b {8[} ] $c] switch -- [string index $cnorm 1][string index $cnorm end] { {[m} { set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m #some systems use colon for 256 colors or RGB or nonstandard subparameters #- it is therefore probably not ok to map to semicolon within SGR codes and treat the same. # - will break mintty? set params [string map [list : {;}] $params] set plist [split $params {;}] if {![llength $plist]} { #if there was nothing - it must be a reset - we need it in the list lappend plist "" } #we shouldn't get an empty or zero param beyond index 0 - but it's possible #some codes have additional parameters - e.g rgb colours so we need to jump forward in the parameter list sometimes. for {set i 0} {$i < [llength $plist]} {incr i} { set p [lindex $plist $i] set paramsplit [split $p :] #for some cases we passthrough $p instead of just the number - in case another implementation uses the colon subparameters #e.g see https://github.com/mintty/mintty/wiki/Tips#text-attributes-and-rendering #this may have originated with kitty? #windows terminal seems to be implementing it too #however, they can be completely repurposed - so we probably need to specifically support them.. REVIEW. #review - what about \x1b\[000m #we need to accept/ignore leading zeros - we can't just pass to expr - as some tcl versions still see leading zero as octal set codeint [string trimleft [lindex $paramsplit 0] 0] switch -- $codeint { "" - 0 { set codestate $codestate_initial set did_reset 1 } 1 { #bold if {[llength $paramsplit] == 1} { dict set codestate intensity $p } else { if {[lindex $paramsplit 1] eq "2"} { dict set codestate shadowed "1:2" ;#turn off also with 22 } } } 2 { #dim dict set codestate intensity 2 } 3 { dict set codestate italic 3 } 4 { if {[llength $paramsplit] == 1} { dict set codestate underline 4 } else { switch -- [lindex $paramsplit 1] { 0 { #no underline dict set codestate underline 24 dict set codestate curlyunderline "" dict set codestate dottedunderline "" dict set codestate dashedunderline "" } 1 { dict set codestate underline 4 ;#straight underline } 2 { dict set codestate doubleunderline 21 } 3 { dict set codestate curlyunderline "4:3" } 4 { dict set codestate dottedunderline "4:4" } 5 { dict set codestate dashedunderline "4:5" } } } } 5 - 6 { dict set codestate blink $p } 7 { dict set codestate reverse 7 } 8 { dict set codestate hide 8 } 9 { dict set codestate strike 9 } 10 - 11 - 12 - 13 - 14 - 15 - 16 - 17 - 18 - 19 { dict set codestate font $p } 20 { dict set codestate gothic 20 } 21 { #ECMA-48 double underline - some terminals use as not-bold. For now we won't support that. dict set doubleunderline 21 } 22 { #normal intensity dict set codestate intensity 22 dict set codestate shadowed "" } 23 { #? wikipedia mentions blackletter - review dict set codestate italic 23 } 24 { dict set codestate underline 24 ;#off dict set codestate curlyunderline "" dict set codestate dottedunderline "" dict set codestate dashedunderline "" } 25 { dict set codestate blink 25 ;#off } 26 { #not known to be used in terminals.. could it be used with elastic tabstops? - review dict set codestate proportional 26 } 27 { dict set codestate reverse 27 ;#off } 28 { dict set codestate hide 28 ;#reveal } 29 { dict set codestate strik 29;#off } 30 - 31 - 32 - 33 - 34 - 35 - 36 - 37 { dict set codestate fg $p ;#foreground colour } 38 { #256 color or rgb #check if subparams supplied as colon separated if {[string first : $p] < 0} { switch -- [lindex $plist $i+1] { 5 { #256 - 1 more param dict set codestate fg "38\;5\;[lindex $plist $i+2]" incr i 2 } 2 { #rgb dict set codestate fg "38\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" incr i 4 } } } else { #apparently subparameters can be left empty - and there are other subparams like transparency and color-space #we should only need to pass it all through for the terminal to understand #review dict set codestate fg $p } } 39 { dict set codestate fg 39 ;#default foreground } 40 - 41 - 42 - 43 - 44 - 45 - 46 - 47 { dict set codestate bg $p ;#background colour } 48 { #256 color or rgb if {[string first : $p] < 0} { switch -- [lindex $plist $i+1] { 5 { #256 - 1 more param dict set codestate bg "48\;5\;[lindex $plist $i+2]" incr i 2 } 2 { #rgb dict set codestate bg "48\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" incr i 4 } } } else { dict set codestate bg $p } } 49 { dict set codestate bg 49 ;#default background } 50 { dict set codestate proportional 50 ;#off - see 26 } 51 - 52 { dict set codestate frame_or_circle 51 } 53 { dict set codestate overline 53 ;#not supported in terminals? pass through anyway } 54 { dict set codestate frame_or_circle 54 ;#off } 55 { dict set codestate overline 55; #off } 58 { #nonstandard #256 color or rgb if {[string first : $p] < 0} { switch -- [lindex $plist $i+1] { 5 { #256 - 1 more param dict set codestate underlinecolour "58\;5\;[lindex $plist $i+2]" incr i 2 } 2 { #rgb dict set codestate underlinecolour "58\;2\;[lindex $plist $i+2]\;[lindex $plist $i+3]\;[lindex $plist $i+4]" incr i 4 } } } else { dict set codestate underlinecolour $p } } 59 { #nonstandard - default underlinecolour dict set codestate underlinecolour 59 } 60 { dict set codestate ideogram_underline 60 dict set codestate ideogram_clear "" } 61 { dict set codestate ideogram_doubleunderline 61 dict set codestate ideogram_clear "" } 62 { dict set codestate ideogram_overline 62 dict set codestate ideogram_clear "" } 63 { dict set codestate ideogram_doubleoverline 63 dict set codestate ideogram_clear "" } 64 { dict set codestate ideogram_stress 64 dict set codestate ideogram_clear "" } 65 { dict set codestate ideogram_clear 65 #review - we still need to pass through the ideogram_clear in case something understands it dict set codestate ideogram_underline "" dict set codestate ideogram_doubleunderline "" dict set codestate ideogram_overline "" dict set codestate ideogram_doubleoverline "" } 73 { #mintty only? #can be combined with subscript dict set codestate superscript 73 dict set codestate nosupersub "" } 74 { dict set codestate subscript 74 dict set codestate nosupersub "" } 75 { dict set codestate nosupersub 75 dict set codestate superscript "" dict set codestate subcript "" } 90 - 91 - 92 - 93 - 94 - 95 - 96 - 97 { dict set codestate fg $p } 100 - 101 - 102 - 103 - 104 - 105 - 106 - 107 { dict set codestate bg $p } } } } default { lappend othercodes $c } } } set codemerge "" dict for {k v} $codestate { switch -- $v { "" { } default { append codemerge "${v}\;" } } } set codemerge [string trimright $codemerge {;}] if {$did_reset} { set codemerge "0\;$codemerge" } return "\x1b\[${codemerge}m[join $othercodes ""]" } #has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? } namespace eval sequence_type { proc is_Fe {code} { # C1 control codes if {[regexp {^\033\[[\u0040-\u005F]}]} { #7bit - typical case return 1 } #8bit #review - all C1 escapes ? 0x80-0x90F #This is possibly problematic as it is affected by encoding. #According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit #"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." return 0 } proc is_Fs {code} { puts stderr "is_Fs unimplemented" } } # -- --- --- --- --- --- --- --- --- --- --- #todo - implement colour resets like the perl module: #https://metacpan.org/pod/Text::ANSI::Util #(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) } namespace eval punk::ansi::ta { #*** !doctools #[subsection {Namespace punk::ansi::ta}] #[para] text ansi functions #[para] based on but not identical to the Perl Text Ansi module: #[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm #[list_begin definitions] namespace path ::punk::ansi #handle both 7-bit and 8-bit csi #review - does codepage affect this? e.g ebcdic has 8bit csi in different position #CSI #variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m variable re_csi_open {(?:\x1b\[|\u009b)} #variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@\^_\{|\}\[\]~`]} variable re_csi_code {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]} #intermediate bytes range 0x20-0x2F (ascii space and !"#$%&'()*+,-./) #parameter bytes range 0x30-0x3F (ascii 0-9:;<=>?) #single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). #colour and style variable re_sgr {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m #OSC - termnate with BEL (\a \007) or ST (string terminator \x1b\\) # 8-byte string terminator is \x9c (\u009c) #non-greedy by excluding ST terminators variable re_esc_osc1 {(?:\x1b\])(?:[^\007]*)\007} #variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} ;#somewhat wrong - we want to exclude the ST - not other esc sequences variable re_esc_osc2 {(?:\x1b\])(?:(?!\x1b\\).)*\x1b\\} variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} variable re_osc_open {(?:\x1b\]|\u009d).*} variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} #if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} variable re_altg0_open {(?:\x1b\(0)} variable re_altg0_close {(?:\x1b\(B)} # DCS "ESC P" or "0x90" is also terminated by ST set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} #ST terminators [list \007 \033\\ \u009c] #regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) #non-greedy by exclusion of ST terminators in body #we need to match \x1b\\ not just \x1b There could be colour codes nested in a privacy msg/string #even if terminals generally don't support that - it's quite possible for an ansi code to get nested this way - and we'd prefer it not to break our splits #Just checking for \x1b will terminate the match too early #we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) #variable re_ST {(?:\x1bX|\u0098|\x1b\^|\u009E|\x1b_|\u009F)(?:[^\x1b\007\u009c]*)(?:\x1b\\|\007|\u009c)} ;#downsides: early terminating with nests, mixes 7bit 8bit start/ends (does that exist in the wild?) #keep our 8bit/7bit start-end codes separate variable re_ST {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)} #consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open #default for regexes is non-newline-sensitive matching - ie matches can span lines # -- --- --- --- variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" # -- --- --- --- #handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regext TRIE generator that works with Tcl regexes #This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. variable re_ansi_detect {(?:\x1b(?:\((?:0|B)|\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|007).)*(?:\x1b\\|\007))|c|7|8|M|E|D|H|=|>|(?:#(?:3|4|5|6|8))))|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)|(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]|(?:\u009d)(?:[^\u009c]*)?\u009c} # -- --- --- --- variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_ST_open}|${re_altg0_open}" #may be same as detect - kept in case detect needs to diverge #variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" set re_ansi_split $re_ansi_detect #detect any ansi escapes #review - only detect 'complete' codes - or just use the opening escapes for performance? proc detect {text} { #*** !doctools #[call [fun detect] [arg text]] #[para]Return a boolean indicating whether Ansi codes were detected in text #[para] variable re_ansi_detect expr {[regexp $re_ansi_detect $text]} } proc detect2 {text} { variable re_ansi_detect2 expr {[regexp $re_ansi_detect2 $text]} } proc detect_open {text} { variable re_ansi_detect_open expr {[regexp $re_ansi_detect_open $text]} } #not in perl ta proc detect_csi {text} { #*** !doctools #[call [fun detect_csi] [arg text]] #[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text #[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] #[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation #[para]There is also a multi-byte escape sequence \u009b #[para]This is less commonly used but is also detected here #[para](This function is not in perl ta) variable re_csi_open expr {[regexp $re_csi_open $text]} } proc detect_sgr {text} { #*** !doctools #[call [fun detect_sgr] [arg text]] #[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. #[para]This is the set of CSI sequences ending in 'm' #[para]This is most commonly an Ansi colour code - but also things such as underline and italics #[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. #[para](This function is not in perl ta) variable re_sgr expr {[regexp $re_sgr $text]} } proc strip {text} { #*** !doctools #[call [fun strip] [arg text]] #[para]Return text stripped of Ansi codes #[para]This is a tailcall to punk::ansi::stripansi tailcall stripansi $text } proc length {text} { #*** !doctools #[call [fun length] [arg text]] #[para]Return the character length after stripping ansi codes - not the printing length string length [stripansi $text] } #todo - handle newlines #not in perl ta #proc printing_length {text} { # #} proc trunc {text width args} { } #not in perl ta #returns just the plaintext portions in a list proc split_at_codes {text} { variable re_ansi_split punk::ansi::internal::splitx $text ${re_ansi_split} } # -- --- --- --- --- --- #Split $text to a list containing alternating ANSI color codes and text. #ANSI color codes are always on the second element, fourth, and so on. #(ie plaintext on odd list-indices ansi on even indices) # Example: #ta_split_codes "" # => "" #ta_split_codes "a" # => "a" #ta_split_codes "a\e[31m" # => {"a" "\e[31m"} #ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} #ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} #ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} #ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} # proc split_codes {text} { variable re_ansi_split set re "(?:${re_ansi_split})+" return [_perlish_split $re $text] } #like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) proc split_codes_single {text} { variable re_ansi_split return [_perlish_split $re_ansi_split $text] } #review - tcl greedy expressions may match multiple in one element proc _perlish_split {re text} { if {[string length $text] == 0} { return {} } set list [list] set start 0 #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW while {[regexp -start $start -indices -- $re $text match]} { lassign $match matchStart matchEnd #puts "->start $start ->match $matchStart $matchEnd" if {$matchEnd < $matchStart} { lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] incr start if {$start >= [string length $text]} { break } continue } lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] set start [expr {$matchEnd+1}] #? if {$start >= [string length $text]} { break } } return [lappend list [string range $text $start end]] } proc _perlish_split2 {re text} { if {[string length $text] == 0} { return {} } set list [list] set start 0 #We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW while {[regexp -start $start -indices -- $re $text match]} { lassign $match matchStart matchEnd #puts "->start $start ->match $matchStart $matchEnd" if {$matchEnd < $matchStart} { lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] incr start } else { lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] set start [expr {$matchEnd+1}] } if {$start >= [string length $text]} { break } } return [lappend list [string range $text $start end]] } proc _ws_split {text} { regexp -all -inline {(?:\S+)|(?:\s+)} $text } # -- --- --- --- --- --- #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } # -- --- --- --- --- --- --- --- --- --- --- namespace eval punk::ansi::class { #assertions specifically for punk::ansi::class namespace namespace import ::punk::assertion::assert punk::assertion::active 1 namespace eval renderer { oo::class create base_renderer { variable o_width o_wrap o_overflow o_appendlines o_looplimit variable o_cursor_column o_cursor_row #variable o_render_index ;#index of input (from_ansistring) grapheme/ansi-code that *has* been rendered variable o_rendereditems variable o_from_ansistring o_to_ansistring variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed constructor {args} { #-- make assert available -- # By pointing it to the assert imported into ::punk::ansi::class # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports) set nspath [namespace path] if {"::punk::ansi::class" ni $nspath} { lappend nspath ::punk::ansi::class } namespace path $nspath #-- -- if {[llength $args] < 2} { error {usage: ?-width ? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring} } lassign [lrange $args end-1 end] from_ansistring to_ansistring set defaults [dict create\ -width \uFFEF\ -wrap 1\ -overflow 0\ -appendlines 1\ -looplimit 15000\ -experimental {}\ -cursor_column 1\ -cursor_row 1\ ] puts "[info object class [self]] renderer [self] constructor from ansistring $from_ansistring to ansistring $to_ansistring" set argsflags [lrange $args 0 end-2] dict for {k v} $argsflags { switch -- $k { -width - -wrap - -overflow - -appendlines - -looplimit - -experimental {} default { set known_opts [dict keys $defaults] #don't use [self class] - or we'll get the superclass error "[info object class [self]] unknown option '$k'. Known options: $known_opts" } } } set opts [dict merge $defaults $argsflags] set o_width [dict get $opts -width] set o_wrap [dict get $opts -wrap] set o_overflow [dict get $opts -overflow] set o_appendlines [dict get $opts -appendlines] set o_looplimit [dict get $opts -looplimit] set o_cursor_column [dict get $opts -cursor_column] set o_cursor_row [dict get $opts -cursor_row] set o_from_ansistring $from_ansistring set o_ns_from [info object namespace $o_from_ansistring] set o_to_ansistring $to_ansistring set o_ns_to [info object namespace $o_to_ansistring] #set o_render_index -1 ;#zero based. -1 indicates nothing yet rendered. set o_rendereditems [list] ;#graphemes + controls + individual ansi codes from input $o_from_ansistring } #temporary test method method eval_in {script} { eval $script } method cursor_column {{col ""}} { if {$col eq ""} { return $o_cursor_column } if {$col < 1} { error "Minimum cursor_column is 1" } set o_cursor_column $col } method cursor_row {{row ""}} { if {$row eq ""} { return $o_cursor_row } if {$row < 1} { error "Minimum cursor_row is 1" } set o_cursor_row $row } #consider scroll area #we need to render to something with a concept of viewport, offscreen above,below,left,right? method rendernext {} { upvar ${o_ns_from}::o_ansisplits from_ansisplits upvar ${o_ns_from}::o_elements from_elements upvar ${o_ns_from}::o_splitindex from_splitindex #if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend' if {![llength $from_ansisplits]} { namespace eval $o_ns_from {my MakeSplit} } set eidx [llength $o_rendereditems] #compare what we've rendered so far to our source to confirm they're still in sync if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} { puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]" puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]" error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring" } if {$eidx == [llength $from_elements]} { #nothing new available return [dict create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0] } set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] #we need to render in pt code chunks - not each grapheme element individually #translate from element index to ansisplits index set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to set elementinfo [lindex $from_elements $eidx] lassign $elementinfo type_rendered item #we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc) #review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ? #if so - we should report a list of the grapheme types that were rendered in a pt block #as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway #e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation. #we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element(?) set newtext "" set rendercount 0 if {$type_rendered eq "g"} { set e_splitindex $process_splitindex while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} { append newtext $item lappend o_rendereditems $elementinfo incr rendercount incr eidx set e_splitindex [lindex $from_splitindex $eidx] set elementinfo [lindex $from_elements $eidx] lassign $elementinfo _type item } } else { #while not g ? render however many ansi sequences are in a row? set newtext $item lappend o_rendereditems $elementinfo incr rendercount } set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}] set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}] assert {$rendercount == $count_rendered} #todo - renderline equivalent? $o_to_ansistring append $newtext return [dict create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered] } } #name all with prefix class_ for rendertype detection oo::class create class_cp437 { superclass base_renderer } oo::class create class_editbuf { superclass base_renderer } } #As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string. #oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics oo::class create class_ansistring { variable o_cksum_command o_string o_count #this is the main state we keep of the split apart string #we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split #State regarding output renderstring (if any) variable o_renderout ;#another class_ansistring instance variable o_renderer ;# punk::ansi::class::renderer::class_ instance variable o_renderwidth variable o_rendertype # -- per element lookups -- # llengths should all be the same # we maintain 4 lookups per entry rather than a single nested list # it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing. variable o_elements ;#elements contains entry for each grapheme/control + each ansi code variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes. variable o_gx0states ;#0|1 for alternate graphics gx0 variable o_splitindex ;#entry for each element indicating the index of the split it belongs to. # -- -- constructor {string} { set o_string $string #-- make assert available -- # By pointing it to the assert imported into ::punk::ansi::class # (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports) set nspath [namespace path] if {"::punk::ansi::class" ni $nspath} { lappend nspath ::punk::ansi::class } namespace path $nspath #-- -- #we choose not to generate an internal split-state for the initial string - which may potentially be large. #there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it. #The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string) set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc. set o_ptlist [list] #o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built. set o_elements [list] set o_sgrstacks [list] set o_gx0states [list] set o_splitindex [list] set o_cksum_command [list sha1::sha1 -hex] #empty if no render methods used # -- set o_renderer "" set o_renderout "" ;#class_ansistring # -- set o_renderwidth 80 set o_rendertype cp437 } #temporary test method method eval_in {script} { eval $script } method checksum {} { if {[catch { package require sha1 } errM]} { error "sha1 package unavailable" } return [{*}$o_cksum_command $o_string] } #todo - allow setting checksum algorithm and/or command method show_state {{verbose 0}} { #show some state info - without updating anything #only use 'my' methods that don't update the state e.g has_ansi set result "" if {![llength $o_ansisplits]} { append result "No internal splits. " append result \n "has ansi : [my has_ansi]" append result \n "Tcl string length raw string: [string length $o_string]" } else { append result \n "has ansi : [my has_ansi]" append result \n "ansisplit list len: [llength $o_ansisplits]" append result \n "plaintext list len: [llength $o_ptlist]" append result \n "cached count : $o_count" append result \n "Tcl string length raw string : [string length $o_string]" append result \n "Tcl string length plaintext parts: [string length [join $o_ptlist ""]]" if {[llength $o_ansisplits] %2 == 0} { append result \n -------------------------------------------------- append result \n Warning - ansisplits appears to be invalid length append result \n Use show_state 1 to view append result \n -------------------------------------------------- } } if {$o_renderer ne ""} { append result \n " renderer obj: $o_renderer" append result \n " renderer class: [info object class $o_renderer]" } if {$o_renderout ne ""} { append result \n " render target ansistring: $o_renderout" append result \n " render target has ansi : [$o_renderout has_ansi]" append result \n " render target count : [$o_renderout count]" } if {$verbose} { append result \n "ansisplits listing" #we will use a foreach with a single var rather than foreach {pt code} - so that if something goes wrong it's clearer. #(using foreach {pt code} on an odd element list will give a spurious empty code at the end) set i 0 foreach item $o_ansisplits { if {$i % 2 == 0} { set type "pt " } else { set type code } append result \n "$type: [ansistring VIEW $item]" incr i } append result \n "Last element of ansisplits should be of type pt" } return $result } #private method method MakeSplit {} { #The split with each code as it's own element is more generally useful. set o_ansisplits [punk::ansi::ta::split_codes_single $o_string]; set o_ptlist [list] set codestack [list] set gx0_state 0 ;#default off set current_split_index 0 ;#incremented for each pt block, incremented for each code if {$o_count eq ""} { set o_count 0 } foreach {pt code} $o_ansisplits { lappend o_ptlist $pt foreach grapheme [punk::char::grapheme_split $pt] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state lappend o_splitindex $current_split_index incr o_count } #after handling the pt block - incr the current_split_index incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry #we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end) if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state lappend o_splitindex $current_split_index #maintenance warning - dup in append! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list "\x1b\[m"] lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { lappend o_elements [list other $code] } } #after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index incr current_split_index } } #assertion every grapheme and every individual code has been added to o_elements #every element has an entry in o_sgrstacks #every element has an entry in o_gx0states assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} } method convert_altg {} { #do we need a method to retrieve without converting in the object? puts "unimplemented" } method strippedlength {} { if {![llength $o_ansisplits]} {my MakeSplit} } #returns the ansiless string - doesn't affect the stored state other than initialising it's internal state if it wasn't already method stripped {} { if {![llength $o_ansisplits]} {my MakeSplit} return [join $o_ptlist ""] } #does not affect object state method DoCount {plaintext} { #- combiners/diacritics just map them away here - but for consistency we need to combine unicode grapheme clusters too. #todo - joiners 200d? zwnbsp set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} #we want length to return number of glyphs + normal controls such as newline.. not screen width. Has to be consistent with index function return [string length [regsub -all $re_diacritics $plaintext ""]] } #This is the count of visible graphemes + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!! method count {} { if {$o_count eq ""} { #only initial string present if {$o_string eq ""} { set o_count 0 return 0 } my MakeSplit #set o_count [my DoCount [join $o_ptlist ""]] } return $o_count } #this is the equivalent of Tcl string length on the ansistripped string method length {} { if {![llength $o_ansisplits]} { if {[punk::ansi::ta::detect $o_string]} { my MakeSplit } else { return [string length $o_string] } } elseif {[llength $o_ansisplits] == 1} { #single split always means no ansi return string length $o_string } return [string length [join $o_ptlist ""]] } method length_raw {} { return [string length $o_string] } #channels for stream in/out.. these are vaguely analogous to the input/output between a shell and a PTY Slave - but this is not intended to be a full pseudoterminal #renderstream_to_render (private?) # write end held by outer ansistring? read end by inner render ansistring ? #renderstream_from_render (public?) method rendertypes {} { set classes [info commands ::punk::ansi::class::renderer::class_*] #strip off class_ set ctypes [lmap v $classes {string range [namespace tail $v] 6 end}] } method rendertype {{rtype ""}} { if {$rtype eq ""} { return $o_rendertype } set rtypes [my rendertypes] if {$rtype ni $rtypes} { error "unknown rendertype '$rtype' - known types: $rtypes (punk::ansi::class::renderer::class_*)" } if {$o_renderout eq ""} { #tell ansistring that it's a renderbuf for another ansistring? point it to the renderer or the parent ansistring? set o_renderout [punk::ansi::class::class_ansistring new ""] } if {$o_renderer ne ""} { set oinfo [info object class $o_renderer] set tail [namespace tail $oinfo] set currenttype [string range $tail 6 end] if {$rtype ne $currenttype} { puts "switch rendertype from $currenttype to $rtype - destroying renderer and creating a new one" $o_renderer destroy ;#what happens to data in target ansistring obj? when does it make sense to keep output and keep rendering vs clearing? set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] } else { return $currenttype } } else { puts "creating first renderer" set o_renderer [punk::ansi::class::renderer::class_$rtype new [self] $o_renderout] } } #--- progressive rendering buffer - another ansistring object method renderwidth {{rw ""}} { #report or set the renderwidth - may invalidate existing render progress? restart? if {$rw eq ""} { return $o_renderwidth } if {$rw eq $o_renderwidth} { return $o_renderwidth } #re-render if needed? set o_renderwidth $rw } method render_state {} { #? report state of render.. we will primarily be using plaintext/ansisequence as the chunk/operation boundary #but - as we can append char sequences directly to the tail split - it's not enough to track which split element we have rendered - but we also need how many graphemes or code sequences we are into the last split. #A single number representing the count of graphemes and individual ANSI codes (from the input ansistring) rendered might work } method renderbuf {} { #get the underlying renderobj - if any return $o_renderout ;#also class_ansistring } method render {} { #full render - return buffer ansistring } method rendernext {} { #render next available pt/code chunk only - not to end of available input if {$o_renderer eq ""} { my rendertype $o_rendertype ;#review - proper way to initialise rendering } $o_renderer rendernext } method render_cursorstate {{row_x_col ""}} { #report /set? cursor posn if {$o_renderer eq ""} { error "No renderer. Call render methods first" } return [dict create row [$o_renderer cursor_row] column [$o_renderer cursor_column]] } #--- method get {} { return $o_string } method has_ansi {} { if {![llength $o_ansisplits]} { #initial string - for large strings,it's faster to run detect than update the internal split-state. return [punk::ansi::ta::detect $o_string] } else { #string will continue to have a single o_ansisplits element if only non-ansi appended return [expr {[llength $o_ansisplits] != 1}] } } #todo - has_ansi_movement ? #If an arbirary ANSI string has movement/cursor restore - then the number of apparent rows in the input will potentially bear no relation to the number of lines of output. #i.e a 'rendered' ANSI string should contain just ANSI SGR character attributes and linefeeds for rows #Knowing which is which can be helpful as far as use of any methods which use the concepts of terminal row/column #analagous to Tcl string append #MAINTENANCE: we need to be very careful to account for unsplit initial state - which exists to make certain operations that don't require an ansi split more efficient method append {args} { set catstr [join $args ""] if {$catstr eq ""} { return $o_string } if {![punk::ansi::ta::detect $catstr]} { #ansi-free additions #if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state if {![llength $o_ansisplits]} { #initialise o_count because we need to add to it. #The count method will do this by calling Makesplit only if it needs to. (which will create ansisplits for anything except empty string) my count } append o_string $catstr;# only append after updating using my count above if {![llength $o_ptlist]} { #If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits #even though we can use lset to add to a list - we can't for empty lappend o_ptlist $catstr #assertion - if o_ptlist is empty so is o_ansisplits lappend o_ansisplits $catstr } else { lset o_ptlist end [string cat [lindex $o_ptlist end] $catstr] lset o_ansisplits end [string cat [lindex $o_ansisplits end] $catstr] } set last_codestack [lindex $o_sgrstacks end] set last_gx0state [lindex $o_gx0states end] set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex foreach grapheme [punk::char::grapheme_split $catstr] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $last_codestack lappend o_gx0states $last_gx0state lappend o_splitindex $current_split_index incr o_count } #incr o_count [my DoCount $catstr] ;#from before we were doing grapheme split.. review } else { if {![llength $o_ansisplits]} { #if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append append o_string $catstr ;#append before split and count on whole lot my MakeSplit ;#update o_count #set combined_plaintext [join $o_ptlist ""] #set o_count [my DoCount $combined_plaintext] assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } else { #update each element of internal state incrementally without reprocessing what is already there. append o_string $catstr set newsplits [punk::ansi::ta::split_codes_single $catstr] set ptnew "" set codestack [lindex $o_sgrstacks end] set gx0_state [lindex $o_gx0states end] set current_split_index [lindex $o_splitindex end] #first pt must be merged with last element of o_ptlist set new_pt_list [list] foreach {pt code} $newsplits { lappend new_pt_list $pt append ptnew $pt foreach grapheme [punk::char::grapheme_split $pt] { lappend o_elements [list g $grapheme] lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state lappend o_splitindex $current_split_index incr o_count } incr current_split_index ;#increment 1 of 2 within each loop if {$code ne ""} { lappend o_sgrstacks $codestack lappend o_gx0states $gx0_state lappend o_splitindex $current_split_index #maintenance - dup in MakeSplit! if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list "\x1b\[m"] lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] lappend o_elements [list sgr $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first - remove straight dupes set dup_posns [lsearch -all -exact $codestack $code] ;#must be -exact because of square-bracket glob chars set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code lappend o_elements [list sgr $code] } else { if {[punk::ansi::codetype::is_gx_open $code]} { set gx0_state 1 lappend o_elements [list gx0 1] ;#don't store code - will complicate debugging if we spit it out and jump character sets } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx0_state 0 lappend o_elements [list gx0 0] ;#don't store code - will complicate debugging if we spit it out and jump character sets } else { lappend o_elements [list other $code] } } incr current_split_index ;#increment 2 of 2 } } lset o_ptlist end [string cat [lindex $o_ptlist end] [lindex $new_pt_list 0]] lappend o_ptlist {*}[lrange $new_pt_list 1 end] lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $newsplits 0]] lappend o_ansisplits {*}[lrange $newsplits 1 end] #if {$o_count eq ""} { # #we have splits - but didn't count graphemes? # set o_count [my DoCount [join $o_ptlist ""]] ;#o_ptlist already has ptnew parts #} else { # incr o_count [my DoCount $ptnew] #} } } assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]} return $o_string } #we are currently assuming that the component strings have complete graphemes ie no split clusters - and therefore we don't attempt to check for and combine at the string catenation points. #This is 'often'? likely to be true - We don't have grapheme cluster support yet anyway. review. method appendobj {args} { if {![llength $o_ansisplits]} { my MakeSplit } foreach a $args { set ns [info object namespace $a] upvar ${ns}::o_ansisplits new_ansisplits upvar ${ns}::o_count new_count if {![llength $new_ansisplits] || $new_count eq ""} { namespace eval $ns {my MakeSplit} } upvar ${ns}::o_ptlist new_ptlist upvar ${ns}::o_string new_string upvar ${ns}::o_elements new_elements upvar ${ns}::o_sgrstacks new_sgrstacks upvar ${ns}::o_gx0states new_gx0states upvar ${ns}::o_splitindex new_splitindex lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $new_ansisplits 0]] lappend o_ansisplits {*}[lrange $new_ansisplits 1 end] lset o_ptlist end [string cat [lindex $o_ptlist end] [lindex $new_ptlist 0]] lappend o_ptlist {*}[lrange $new_ptlist 1 end] append o_string $new_string lappend o_elements {*}$new_elements #prepend the previous sgr stack to all stacks in the new list. #This allows us to use only list operations to keep the sgr data valid - but we don't yet make it canonical/flat by examining each for resets etc. #ie just call sgr_merge_list once now. set laststack [lindex $o_sgrstacks end] set mergedtail [punk::ansi::codetype::sgr_merge_list "" {*}$laststack] foreach n $new_sgrstacks { lappend o_sgrstacks [list $mergedtail {*}$n] } lappend o_gx0states {*}$new_gx0states #first and last of ansisplits splits merge set lastidx [lindex $o_splitindex end] set firstnewidx [lindex $new_splitindex 0] set diffidx [expr {$lastidx - $firstnewidx}] ;#may be negative foreach v $new_splitindex { lappend o_splitindex [expr {$v + $diffidx}] } incr o_count $new_count } return $o_count } #method append_and_render - append and render up to end of appended data at same time method view {args} { if {$o_string eq ""} { return "" } #ansistring VIEW relies only on the raw ansi input as it is essentially just a string map. #We don't need to force an ansisplit if we happen to have an unsplit initial string ansistring VIEW $o_string } method viewcodes {args} { if {$o_string eq ""} { return "" } if {![llength $o_ansisplits]} {my MakeSplit} set redb [a+ red bold] ;#osc/apm ? anything with potential security risks or that is unusual set greenb [a+ green bold] ;#SGR set cyanb [a+ cyan bold] ;#col,row movement set blueb [a+ blue bold] ;# set blueb_r [a+ blue bold reverse] set whiteb [a+ white bold] ;#SGR reset (or highlight first part if leading reset) set GX [a+ black White bold] ;#alt graphics set unk [a+ yellow bold] ;#unknown/unhandled set RST [a] set re_col_move {\x1b\[([0-9]*)(C|D|G)$} set re_row_move {\x1b\[([0-9]*)(A|B)$} set re_both_move {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)H$} set re_vt_sequence {\x1b\[([0-9]*)(?:;){0,1}([0-9]*)~$} set re_cursor_save {\x1b\[s$} set re_cursor_restore {\x1b\[u$} set re_cursor_save_dec {\x1b7$} set re_cursor_restore_dec {\x1b8$} set arrow_left \u2190 set arrow_right \u2192 set arrow_up \u2191 set arrow_down \u2193 set arrow_lr \u2194 set arrow_du \u2195 #2024 - there is no 4-arrow symbol or variations (common cursor and window icon) in unicode - despite requests and argument from the community that this has been in use for decades. #They are probably too busy with stupid emoji additions to add this or c1 visualization glyphs. #don't split into lines first - \n is valid within ST sections set output "" #set splits [punk::ansi::ta::split_codes_single $string] foreach {pt code} $o_ansisplits { append output [ansistring VIEW {*}$args $pt] #map DEC cursor_save/restore to CSI version set code [string map [list \x1b7 \x1b\[s \x1b8 \x1b\[u ] $code] set c1 [string index $code 0] set c1c2 [string range $code 0 1] #set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} set leadernorm [string range [string map [list\ \x1b\[ 7CSI\ \x9b 8CSI\ \x1b\] 7OSC\ \x1b\( 7GFX\ \x9d 8OSC\ \x1b 7ESC\ ] $c1c2] 0 3] ;#leadernorm is 1st 2 chars mapped to 4char normalised indicator - or is original 2 chars #we leave the tail of the code unmapped for now switch -- $leadernorm { 7CSI - 7OSC { set codenorm [string cat $leadernorm [string range $code 2 end]] } 7ESC { set codenorm [string cat $leadernorm [string range $code 1 end]] } 8CSI - 8OSC { set codenorm [string cat $leadernorm [string range $code 1 end]] } default { #we haven't made a mapping for this set codenorm $code } } switch -- $leadernorm { {7CSI} - {8CSI} { set param [string range $codenorm 4 end-1] #puts stdout "--> CSI [string index $leadernorm 0] bit param:$param" switch -- [string index $codenorm end] { m { if {[punk::ansi::codetype::is_sgr_reset $code]} { set displaycode [ansistring VIEW $code] append output ${whiteb}$displaycode$RST } else { set displaycode [ansistring VIEW $code] if {[punk::ansi::codetype::has_sgr_leadingreset $code]} { #highlight the esc & leftbracket in white as indication there is a leading reset set cposn [string first ";" $displaycode] append output ${whiteb}[string range $displaycode 0 $cposn]$RST${greenb}[string range $displaycode $cposn+1 end]$RST } else { append output ${greenb}$displaycode$RST } } } A - B { #row move set displaycode [ansistring VIEW $code] set displaycode [string map [list A "A$arrow_up" B "B$arrow_down"] $displaycode] append output ${cyanb}$displaycode$RST } C - D - G { #set num [string range $codenorm 4 end-1] set displaycode [ansistring VIEW $code] set displaycode [string map [list C "C$arrow_right" D "D$arrow_left" G "G$arrow_lr"] $displaycode] append output ${cyanb}$displaycode$RST } H - f { set params [string range $codenorm 4 end-1] lassign [split $params {;}] row col #lassign $matchinfo _match row col set displaycode [ansistring VIEW $code] if {$col eq ""} { #row only move set map [list H "H${arrow_lr}" f "f${arrow_lr}] } else { #row and col move set map [list H "H${arrow_lr}${arrow_du}" f "${arrow_lr}${arrow_du}"] } set displaycode [string map $map $displaycode] append output ${cyanb}$displaycode$RST } s { append output ${blueb}[ansistring VIEW $code]$RST } u { append output ${blueb_r}[ansistring VIEW $code]$RST } default { append output ${unk}[ansistring VIEW -lf 1 $code]$RST } } } 7GFX { switch -- [string index $codenorm 4] { "0" { append output ${GX}GX+$RST } "B" { append output ${GX}GX-$RST } } } 7ESC { append output ${unk}[ansistring VIEW -lf 1 $code]$RST } default { #if the code is a PM (or other encapsulation type code e.g terminated by ST) we want to see linefeeds as visual representation character append output ${unk}[ansistring VIEW -lf 1 $code]$RST } } } return $output } method viewstyle {args} { if {$o_string eq ""} { return "" } if {![llength $o_ansisplits]} {my MakeSplit} #set splits [punk::ansi::ta::split_codes_single $string] set output "" set codestack [list] set gx_stack [list] ;#not actually a stack set cursor_saved "" foreach {pt code} $o_ansisplits { if {[llength $args]} { set pt [ansistring VIEW {*}$args $pt] } append output [punk::ansi::codetype::sgr_merge_list {*}$codestack]$pt if {$code ne ""} { append output [a][ansistring VIEW -lf 1 $code] if {[punk::ansi::codetype::is_sgr_reset $code]} { set codestack [list] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } elseif {[punk::ansi::codetype::is_sgr $code]} { #basic simplification first.. straight dups set dup_posns [lsearch -all -exact $codestack $code] ;#-exact because of square-bracket glob chars #lremove not present in pre 8.7! set codestack [lremove $codestack {*}$dup_posns] lappend codestack $code } elseif {[regexp {\x1b7|\x1b\[s} $code]} { #cursor_save set cursor_saved [punk::ansi::codetype::sgr_merge_list {*}$codestack] } elseif {[regexp {\x1b8|\x1b\[u} $code]} { #cursor_restore set codestack [list $cursor_saved] } else { #leave SGR stack as is if {[punk::ansi::codetype::is_gx_open $code]} { set gx_stack [list gx0_on] ;#we'd better use a placeholder - or debugging will probably get into a big mess } elseif {[punk::ansi::codetype::is_gx_close $code]} { set gx_stack [list] } } } } return $output } } } namespace eval punk::ansi::ansistring { #*** !doctools #[subsection {Namespace punk::ansi::ansistring}] #[para]punk::ansi::ansistring ensemble - ansi-aware string operations #[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings #[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. #[list_begin definitions] namespace path [list ::punk::ansi ::punk::ansi::ta] namespace ensemble create namespace export length trim trimleft trimright INDEX COUNT VIEW VIEWCODES VIEWSTYLE INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX NEW #todo - expose _splits_ methods so caller can work efficiently with the splits themselves #we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single #\UFFFD - replacement char or \U2426 #using ISO 2047 graphical representations of control characters - probably obsolete? #00 NUL Null ⎕ U+2395 NU #01 TC1, SOH Start of Heading ⌈ U+2308 SH #02 TC2, STX Start of Text ⊥ U+22A5 SX #03 TC3, ETX End of Text ⌋ U+230B EX #04 TC4, EOT End of Transmission ⌁ U+2301[9] ET #05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ #06 TC6, ACK Acknowledge ✓ U+2713 AK #07 BEL Bell ⍾ U+237E[9] BL #08 FE0, BS Backspace ⤺ —[b] BS #09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT #0A FE2, LF Line Feed ≡ U+2261 LF #0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT #0C FE4, FF Form Feed ↡ U+21A1 FF #0D FE5, CR Carriage Return ⪪ U+2AAA CR #0E SO Shift Out ⊗ U+2297 SO #0F SI Shift In ⊙ U+2299 SI #10 TC7, DLE Data Link Escape ⊟ U+229F DL #11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 #12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 #13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 #14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 #15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK #16 TC9, SYN Synchronization ⎍ U+238D SY #17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB #18 CAN Cancel ⧖ U+29D6 CN #19 EM End of Medium ⍿ U+237F[9] EM #1A SUB Substitute Character ␦ U+2426[12] SB #1B ESC Escape ⊖ U+2296 EC #1C IS4, FS File Separator ◰ U+25F0 FS #1D IS3, GS Group Separator ◱ U+25F1 GS #1E IS2, RS Record Separator ◲ U+25F2 RS #1F IS1 US Unit Separator ◳ U+25F3 US #20 SP Space △ U+25B3 SP #7F DEL Delete ▨ —[d] DT #C0 control code visual representations # Code Val Name 2X Description # 2400 00 NUL NU Symbol for Null # 2401 01 SOH SH Symbol for Start of Heading # 2402 02 STX SX Symbol for Start of Text # 2403 03 ETX EX Symbol for End of Text # 2404 04 EOT ET Symbol for End of Transmission # 2405 05 ENQ EQ Symbol for Enquiry # 2406 06 ACK AK Symbol for Acknowledge # 2407 07 BEL BL Symbol for Bell # 2409 09 BS BS Symbol for Backspace # 2409 09 HT HT Symbol for Horizontal Tab (1) # 240A 0A LF LF Symbol for Line Feed (1) # 240B 0B VT VT Symbol for Vertical Tab (1) # 240C 0C FF FF Symbol for Form Feed (2) # 240D 0D CR CR Symbol for Carriage Return (1) # 240E 0E SO SO Symbol for Shift Out # 240F 0F SI SI Symbol for Shift In # 2410 10 DLE DL Symbol for Data Link Escape # 2411 11 DC1 D1 Symbol for Device Control 1 (2) # 2412 12 DC2 D2 Symbol for Device Control 2 (2) # 2413 13 DC3 D3 Symbol for Device Control 3 (2) # 2414 14 DC4 D4 Symbol for Device Control 4 (2) # 2415 15 NAK NK Symbol for Negative Acknowledge # 2416 16 SYN SY Symbol for Synchronous Idle # 2417 17 ETB EB Symbol for End of Transmission Block # 2418 18 CAN CN Symbol for Cancel # 2419 19 EM EM Symbol for End of Medium # 241A 1A SUB SU Symbol for Substitute # 241B 1B ESC EC Symbol for Escape # 241C 1C FS FS Symbol for Field Separator (3) # 241D 1D GS GS Symbol for Group Separator (3) # 241E 1E RS RS Symbol for Record Separator (3) # 241F 1F US US Symbol for Unit Separator (3) # 2420 20 SP SP Symbol for Space (4) # 2421 7F DEL DT Symbol for Delete (4) #C1 control code visual representations #Code Val Name 2X Description # 80 80 80 (1) # 81 81 81 (1) # E022 82 BPH 82 Symbol for Break Permitted Here (2) # E023 83 NBH 83 Symbol for No Break Here (2) # E024 84 IND IN Symbol for Index (3) # E025 85 NEL NL Symbol for Next Line (4) # E026 86 SSA SS Symbol for Start Selected Area # E027 87 ESA ES Symbol for End Selected Area # E028 88 HTS HS Symbol for Character Tabulation Set # E029 89 HTJ HJ Symbol for Character Tabulation with Justification # E02A 8A VTS VS Symbol for Line Tabulation Set # E02B 8B PLD PD Symbol for Partial Line Forward # E02C 8C PLU PU Symbol for Partial Line Backward # E02D 8D RI RI Symbol for Reverse Line Feed # E02E 8E SS2 S2 Symbol for Single Shift 2 # E02F 8F SS3 S3 Symbol for Single Shift 3 # E030 90 DCS DC Symbol for Device Control String # E031 91 PU1 P1 Symbol for Private Use 1 # E032 92 PU2 P2 Symbol for Private Use 2 # E033 93 STS SE Symbol for Set Transmit State # E034 94 CCH CC Symbol for Cancel Character # E035 95 MW MW Symbol for Message Waiting # E036 96 SPA SP Symbol for Start Protected (Guarded) Area # E037 97 EPA EP Symbol for End Protected (Guarded) Area # E038 98 SOS 98 Symbol for Start of String (2) # 99 99 (1) # E03A 9A SCI 9A Symbol for Single Character Introducer (2) # E03B 9B CSI CS Symbol for Control Sequence Introducer (5) # E03C 9C ST ST Symbol for String Terminator # E03D 9D OSC OS Symbol for Operating System Command # E03E 9E PM PM Symbol for Privacy Message # E03F 9F APC AP Symbol for Application Program Command variable debug_visuals #modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) #Goal is not to map every control character? #Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly #ETX -ctrl-c #EOT ctrl-d (EOF?) #SYN ctrl-v #SUB ctrl-z #CAN ctrl-x #FS ctrl-\ (SIGQUIT) set visuals_interesting [dict create\ NUL [list \x00 \u2400]\ ETX [list \x03 \u2403]\ EOT [list \x04 \u2404]\ BEL [list \x07 \u2407]\ SYN [list \x16 \u2416]\ CAN [list \x18 \u2418]\ SUB [list \x1a \u241a]\ FS [list \x1c \u241c]\ SOS [list \x98 \ue038]\ CSI [list \x9b \ue03b]\ ST [list \x9c \ue03c]\ PM [list \x9e \ue03e]\ APC [list \x9f \ue03f]\ ] #it turns out we need pretty much everything for debugging set visuals_c0 [dict create\ NUL [list \x00 \u2400]\ SOH [list \x01 \u2401]\ STX [list \x02 \u2402]\ ETX [list \x03 \u2403]\ EOT [list \x04 \u2404]\ ENQ [list \x05 \u2405]\ ACK [list \x06 \u2406]\ BEL [list \x07 \u2407]\ FF [list \x0c \u240c]\ SO [list \x0e \u240e]\ SF [list \x0f \u240f]\ DLE [list \x10 \u2410]\ DC1 [list \x11 \u2411]\ DC2 [list \x12 \u2412]\ DC3 [list \x13 \u2413]\ DC4 [list \x14 \u2414]\ NAK [list \x15 \u2415]\ SYN [list \x16 \u2416]\ ETB [list \x17 \u2417]\ CAN [list \x18 \u2418]\ EM [list \x19 \u2419]\ SUB [list \x1a \u241a]\ FS [list \x1c \u241c]\ GS [list \x1d \u241d]\ RS [list \x1e \u241e]\ US [list \x1f \u241f]\ DEL [list \x7f \u2421]\ ] #alternate symbols for space # \u2422 Blank Symbol (b with forwardslash overly) # \u2423 Open Box (square bracket facing up like a tray/box) # \u2424 Symbol for Newline (small "NL") # \u2425 Symbol for Delete Form Two (some sort of fat forward-slash thing) # \u2426 Symbol for Substitute Form Two (backwards question mark) #these are in the PUA (private use area) unicode block - seem to be rarely supported #the unicode consortium has apparently neglected to provide separate visual representation codepoints for not only the c1 controls (some of which ARE still used e.g in sixels) but various other non-printing chars such as BOM #The debugging/analysis usecase is an important one - surely moreso that some of the emoji stuff coming out of there. #we'll hack in some stuff as needed - may override some of the visuals_c1 which is usually just empty/substitute glyphs #Being repurposed - these could potentially be confused with actual characters depending on the debugging context #To minimize potential confusion - we'll use a longer replacement sequence - which is not ideal from the perspective of terminal column layout debugging #A single unique glyph would be better - although the bracketing for 8-bit codes is a useful visual indicator #(review - BOM should use different brackets to c1?) #todo - regularly check if unicode has improved in this area - though with requests for c1 visuals dating back to at least 2011 - it's doubtful. #for 8-bit controls - we will standardize on a fixed width of 4 bracketing with: #\u2987 and \u2988 from Miscellaneous Mathematical Symbols-B (D or fractional-moon shaped brackets) #\u2987 - Z Notation Left Image Bracket #\u2988 - Z Notation Right Image Bracket #selection of these is also based on them being seemingly reasonably widely available in fonts.. review #my apologies if you're debugging z-notation strings! #If only column's-worth of symbol/char needed between the brackets - pad with a space before the closing bracket #8-bit brackets set ob8 \u2987; set cb8 \u2988 ;#z-notation image brackets #miscellaneous debug code brackets set obm \u27e6 ;set cbm \u27e7 ;#square double brackets from Miscellaneous Mathematical Symbols-A #this private range so rarely supported in fonts - and visuals are unknown, so we will make up some 2-letter codes for now #set visuals_c1 [dict create\ # BPH [list \x82 "${ob8}\ue022 $cb8"]\ # NBH [list \x83 "${ob8}\ue023 $cb8"]\ # IND [list \x84 "${ob8}\ue024 $cb8"]\ # NEL [list \x85 "${ob8}\ue025 $cb8"]\ # SSA [list \x86 "${ob8}\ue026 $cb8"]\ # ESA [list \x87 "${ob8}\ue027 $cb8"]\ # HTS [list \x88 "${ob8}\ue028 $cb8"]\ # HTJ [list \x89 "${ob8}\ue029 $cb8"]\ # VTS [list \x8a "${ob8}\ue02a $cb8"]\ # PLD [list \x8b "${ob8}\ue02a $cb8"]\ # PLU [list \x8c "${ob8}\ue02c $cb8"]\ # RI [list \x8d "${ob8}\ue02d $cb8"]\ # SS2 [list \x8e "${ob8}\ue02e $cb8"]\ # SS3 [list \x8f "${ob8}\ue02f $cb8"]\ # DCS [list \x90 "${ob8}\ue030 $cb8"]\ # PU1 [list \x91 "${ob8}\ue031 $cb8"]\ # PU2 [list \x92 "${ob8}\ue032 $cb8"]\ # STS [list \x93 "${ob8}\ue033 $cb8"]\ # CCH [list \x94 "${ob8}\ue034 $cb8"]\ # MW [list \x95 "${ob8}\ue035 $cb8"]\ # SPA [list \x96 "${ob8}\ue036 $cb8"]\ # EPA [list \x97 "${ob8}\ue037 $cb8"]\ # SOS [list \x98 "${ob8}\ue038 $cb8"]\ # SCI [list \x9a "${ob8}\ue03a $cb8"]\ # CSI [list \x9b "${ob8}\ue03b $cb8"]\ # ST [list \x9c "${ob8}\ue03c $cb8"]\ # OSC [list \x9d "${ob8}\ue03d $cb8"]\ # PM [list \x9e "${ob8}\ue03e $cb8"]\ # APC [list \x9f "${ob8}\ue03f $cb8"]\ #] #these 2 letter codes only need to disambiguate within the c1 set - they're not great. #these sit within the Latin-1 Supplement block set visuals_c1 [dict create\ PAD [list \x80 "${ob8}PD$cb8"]\ HOP [list \x81 "${ob8}HP$cb8"]\ BPH [list \x82 "${ob8}BP$cb8"]\ NBH [list \x83 "${ob8}NB$cb8"]\ IND [list \x84 "${ob8}IN$cb8"]\ NEL [list \x85 "${ob8}NE$cb8"]\ SSA [list \x86 "${ob8}SS$cb8"]\ ESA [list \x87 "${ob8}ES$cb8"]\ HTS [list \x88 "${ob8}HS$cb8"]\ HTJ [list \x89 "${ob8}HT$cb8"]\ VTS [list \x8a "${ob8}VT$cb8"]\ PLD [list \x8b "${ob8}PD$cb8"]\ PLU [list \x8c "${ob8}PU$cb8"]\ RI [list \x8d "${ob8}RI$cb8"]\ SS2 [list \x8e "${ob8}S2$cb8"]\ SS3 [list \x8f "${ob8}S3$cb8"]\ DCS [list \x90 "${ob8}DC$cb8"]\ PU1 [list \x91 "${ob8}P1$cb8"]\ PU2 [list \x92 "${ob8}P2$cb8"]\ STS [list \x93 "${ob8}SX$cb8"]\ CCH [list \x94 "${ob8}CC$cb8"]\ MW [list \x95 "${ob8}MW$cb8"]\ SPA [list \x96 "${ob8}SP$cb8"]\ EPA [list \x97 "${ob8}EP$cb8"]\ SOS [list \x98 "${ob8}SO$cb8"]\ SCI [list \x9a "${ob8}SC$cb8"]\ CSI [list \x9b "${ob8}CS$cb8"]\ ST [list \x9c "${ob8}ST$cb8"]\ OSC [list \x9d "${ob8}OS$cb8"]\ PM [list \x9e "${ob8}PM$cb8"]\ APC [list \x9f "${ob8}AP$cb8"]\ ] set hack [dict create] dict set hack BOM1 [list \uFEFF "${obm}\U1f4a3$cbm"] ;#byte order mark/ ZWNBSP (ZWNBSP usage generally deprecated) - a picture of a bomb(2wide glyph) #review - other boms? Encoding dependent? dict set hack DCS [list \x90 "${ob8}\u2328 $cb8"] ;#keyboard from Miscellaneous Technical - 1 wide + pad. dict set hack SOS [list \x98 "${ob8}\u2380 $cb8"] ;#Insertion Symbol from Miscellaneous Technical - 1 wide + pad dict set hack ST [list \x9c "${ob8}\u2383 $cb8"] ;#Emphasis Symbol from Miscellaneous Technical - 1 wide + pad (graphically related to \u2380) dict set hack CSI [list \x9b "${ob8}\u2386 $cb8"] ;#Enter Symbol from Miscellaneous Technical - 1 wide + pad dict set hack OSC [list \x9d "${ob8}\u2b55$cb8"] ;#bright red ring from Miscellaneous Symbols and Arrows - 2 wide (OSC could be used for clipboard or other potentially security sensitive functions) dict set hack PM [list \x9e "${ob8}PM$cb8"] dict set hack APC [list \x9f "${ob8}\U1f534$cb8"] ;#bright red ball from Miscellaneoust Symbols and Pictographs - 2 wide (APC also noted as a potential security risk) set debug_visuals [dict merge $visuals_c0 $visuals_c1 $hack] #for repeated interaction with the same ANSI string - a mechanism to store state is more efficient proc NEW {string} { punk::ansi::class::class_ansistring new $string } proc VIEW {args} { #*** !doctools #[call [fun VIEW] [arg string]] #[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets #[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408') #[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions #[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. variable debug_visuals if {![llength $args]} { return "" } set string [lindex $args end] set defaults [dict create\ -esc 1\ -cr 1\ -lf 0\ -vt 0\ -ht 1\ -bs 1\ -sp 1\ ] set argopts [lrange $args 0 end-1] if {[llength $argopts] % 2 != 0} { error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [dict keys $defaults]" } set opts [dict merge $defaults $argopts] # -- --- --- --- --- set opt_esc [dict get $opts -esc] set opt_cr [dict get $opts -cr] set opt_lf [dict get $opts -lf] set opt_vt [dict get $opts -vt] set opt_ht [dict get $opts -ht] set opt_bs [dict get $opts -bs] set opt_sp [dict get $opts -sp] # -- --- --- --- --- set visuals_opt [dict create] if {$opt_esc} { dict set visuals_opt ESC [list \x1b \u241b] } if {$opt_cr} { dict set visuals_opt CR [list \x0d \u240d] } if {$opt_lf} { dict set visuals_opt LF [list \x0a \u240a] } if {$opt_vt} { dict set visuals_opt VT [list \x0b \u240b] } if {$opt_ht} { dict set visuals_opt HT [list \x09 \u2409] } if {$opt_bs} { dict set visuals_opt BS [list \x08 \u2408] } if {$opt_sp} { dict set visuals_opt SP [list \x20 \u2420] } set visuals [dict merge $visuals_opt $debug_visuals] set charmap [list] dict for {nm chars} $visuals { lappend charmap {*}$chars } return [string map $charmap $string] #test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs #return [string map [list \033 \U2296 \007 \U237E] $string] } #The implementation of viewcodes,viewstyle is more efficiently done in an object for the case where repeated calls of various methods can re-use the internal splits. #for oneshots here - there is only minor overhead to use and destroy the object here. proc VIEWCODES {args} { set string [lindex $args end] if {$string eq ""} { return "" } set arglist [lrange $args 0 end-1] set ansistr [ansistring NEW $string] set result [$ansistr viewcodes {*}$arglist] $ansistr destroy return $result } #an attempt to show the codes and colour/style of the *input* #ie we aren't looking at the row/column positioning - but we do want to keep track of cursor attribute saves and restores proc VIEWSTYLE {args} { set string [lindex $args end] if {$string eq ""} { return "" } set arglist [lrange $args 0 end-1] set ansistr [ansistring NEW $string] set result [$ansistr viewstyle {*}$arglist] $ansistr destroy return $result } #todo - change to COUNT to emphasize the difference between this and doing a Tcl string length on the ansistriped string! #review. Tabs/elastic tabstops. Do we want to count a tab as one element? Probably so if we are doing so for \n etc and not counting 2W unicode. #Consider leaving tab manipualation for a width function which determines columns occupied for all such things. proc COUNT {string} { #*** !doctools #[call [fun COUNT] [arg string]] #[para]Returns the count of visible graphemes and non-ansi control characters #[para]Incomplete! grapheme clustering support not yet implemented - only diacritics are currently clustered to count as one grapheme. #[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. #[para]This is not quite equivalent to calling string length on the result of stripansi $string due to diacritics and/or grapheme combinations #[para]Note that this returns the number of characters in the payload (after applying combiners) #It is not always the same as the width of the string as rendered on a terminal due to 2wide Unicode characters and the usual invisible control characters such as \r and \n #[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. #stripping diacritics only makes sense if we are counting them as combiners and also treating unicode grapheme combinations as single entities. #as Our ansistring INDEX function returns the character with diacritics, and will ultimately return grapheme clusters as a single element - we strip theme here as not counted. #todo - combiners/diacritics? just map them away here? set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} set string [regsub -all $re_diacritics $string ""] #we want length to return number of glyphs.. not screen width. Has to be consistent with index function string length [stripansi $string] } #included as a test/verification - slightly slower. #grapheme split version may end up being used once it supports unicode grapheme clusters proc count2 {string} { #we want count to return number of glyphs.. not screen width. Has to be consistent with index function return [llength [punk::char::grapheme_split [stripansi $string]]] } proc length {string} { string length [stripansi $string] } proc _splits_trimleft {sclist} { set intext 0 set outlist [list] foreach {pt ansiblock} $sclist { if {$ansiblock ne ""} { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimleft $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } else { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" } else { lappend outlist [string trimleft $pt] set intext 1 } } else { lappend outlist $pt } } } return $outlist } proc _splits_trimright {sclist} { set intext 0 set outlist [list] #we need to account for empty ansiblock var caused by dual-var iteration over odd length list foreach {pt ansiblock} [lreverse $sclist] { if {$ansiblock ne ""} { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" $ansiblock } else { lappend outlist [string trimright $pt] $ansiblock set intext 1 } } else { lappend outlist $pt $ansiblock } } else { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { lappend outlist "" } else { lappend outlist [string trimright $pt] set intext 1 } } else { lappend outlist $pt } } } return [lreverse $outlist] } proc _splits_trim {sclist} { return [_splits_trimright [_splits_trimleft $sclist]] } #Note that trim/trimleft/trimright will trim spaces at the extremities that are styled with background colour, underline etc #that may be unexpected, but it's probably the only thing that makes sense. Plain string trim can chop off whitespace that is extraneous to the ansi entirely. proc trimleft {string args} { set intext 0 set out "" #for split_codes only first or last pt can be empty string - but we can also get an empty ansiblock by using foreach with 2 vars on an odd-length list foreach {pt ansiblock} [split_codes $string] { if {!$intext} { if {$pt eq "" || [regexp {^\s+$} $pt]} { append out $ansiblock } else { append out [string trimleft $pt]$ansiblock set intext 1 } } else { append out $pt$ansiblock } } return $out } proc trimright {string} { if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing set rtrimmed_list [_splits_trimright [split_codes $string]] return [join $rtrimmed_list ""] } proc trim {string} { #make sure we do our ansi-scanning split only once - so use list-based trim operations #order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length #we save a single function call by calling both here rather than _splits_trim join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" } #Capitalised because it's the clustered grapheme/controlchar index - not the tcl string index proc INDEX {string index} { #*** !doctools #[call [fun index] [arg string] [arg index]] #[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) #[para]Returns the character (with applied ansi effect) at position index #[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. #[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) #[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. #[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. #[para]As any operation using end-+ will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. #[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. #[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code #[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. #[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. #[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible #[para]Notes: #[para]This function has to split the whole string into plaintext & ansi codes even for a very low index #[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. #[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run #todo - end-x +/-x+/-x etc set original_index $index set index [string map [list _ ""] $index] #short-circuit some trivial cases if {[string is integer -strict $index]} { if {$index < 0} {return ""} #this only short-circuits an index greater than length including ansi-chars #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length if {$index > [string length $string]} {return ""} } else { if {[string match end* $index]} { #for end- we will probably have to blow a few cycles stripping first and calculate the length if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { return "" } } else { set offset 0 } #by now, if op = + then offset = 0 so we only need to handle the minus case set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal if {$offset == 0} { set index [expr {$payload_len-1}] } else { set index [expr {($payload_len-1) - $offset}] } if {$index < 0} { #don't waste time splitting and looping the string return "" } } else { #we are trying to avoid evaluating unbraced expr of potentially insecure origin regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string if {[string is integer -strict $tail]} { #plain +- if {$op eq "-"} { #return nothing for negative indices as per Tcl's lindex etc return "" } set index $tail } else { if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } } } #any pt could be empty if using split_codes_single (or just first and last pt if split_codes) set low -1 set high -1 set pt_index -2 set pt_found -1 set char "" #set grapheme_codestacks [list] ;#stack of codes per grapheme - will be flattened/coalesced set codestack [list] #we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go #(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) foreach {pt code} $ansisplits { incr pt_index 2 #we want an index per grapheme - whether it is doublewide or single if {$pt ne ""} { set graphemes [punk::char::grapheme_split $pt] set low [expr {$high + 1}] ;#last high #incr high [string length $pt] incr high [llength $graphemes] } if {$pt ne "" && ($index >= $low && $index <= $high)} { set pt_found $pt_index #set char [string index $pt $index-$low] set char [lindex $graphemes $index-$low] break } if {[punk::ansi::codetype::is_sgr_reset $code]} { #we can throw away previous codestack set codestack [list] } elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} { set codestack [list $code] } else { #may have partial resets #sgr_merge_list will handle at end #we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. #Review - consider if any other types of code make sense to retain in the output in this context. if {[punk::ansi::codetype::is_sgr $code]} { lappend codestack $code } } } if {$pt_found >= 0} { return [punk::ansi::codetype::sgr_merge_list {*}$codestack]$char } else { return "" } } #helper to convert indices (possibly of form x+y end-x etc) to numeric values within the payload range i.e without ansi #return empty string for each index that is out of range #review - this is possibly too slow to be very useful as is. # consider converting to oo and maintaining state of ansisplits so we don't repeat relatively expensive operations for same string #see also punk::list_index_resolve / punk::list_index_get for ways to handle tcl list/string indices without parsing them. proc INDEXABSOLUTE {string args} { set payload_len -1 ;# -1 as token to indicate we haven't calculated it yet (only want to call it once at most) set testindices [list] foreach index $args { if {[string is integer -strict $index]} { if {$index < 0} { lappend testindices "" } elseif {$index > [string length $string]} { #this only short-circuits an index greater than length including ansi-chars #we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length lappend testindices "" } else { lappend testindices $index } } else { if {[string match end* $index]} { #for end- we will probably have to blow a few cycles stripping first and calculate the length if {$index ne "end"} { set op [string index $index 3] set offset [string range $index 4 end] if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} if {$op eq "+" && $offset != 0} { lappend testindices "" continue } } else { set offset 0 } #by now, if op = + then offset = 0 so we only need to handle the minus case if {$payload_len == -1} { set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal } if {$offset == 0} { set index [expr {$payload_len-1}] } else { set index [expr {($payload_len-1) - $offset}] } if {$index < 0} { lappend testindices "" } else { lappend testindices $index } } else { #we are trying to avoid evaluating unbraced expr of potentially insecure origin regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string if {[string is integer -strict $tail]} { #plain +- if {$op eq "-"} { #return nothing for negative indices as per Tcl's lindex etc lappend indices "" continue } set index $tail lappend testindices $index } else { if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { if {[string is integer -strict $a] && [string is integer -strict $b]} { if {$op eq "-"} { set index [expr {$a - $b}] } else { set index [expr {$a + $b}] } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } } else { error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" } lappend testindices $index } } } #assertion - we made exactly one append to testindices if there was no error } #we now have numeric or empty string indices - but haven't fully checked they are within the underlying payload length if {[join $testindices ""] eq ""} { #don't calc ansistring length if no indices to check return $testindices } if {$payload_len == -1} { set payload_len [punk::ansi::ansistring::length $string] } set indices [list] foreach ti $testindices { if {$ti ne ""} { if {$ti < $payload_len} { lappend indices $ti } else { lappend indices "" } } else { lappend indices "" } } return $indices } #Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc. #The column/row concept works for an ansistring that has been 'rendered' to some defined area. #row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows. #return pair of column extents occupied by the character index supplied. #single-width grapheme will return pair of integers of equal value #doulbe-width grapheme will return a pair of consecutive indices proc INDEXCOLUMNS {string idx} { #There is an index per grapheme - whether it is 1 or 2 columns wide set index [lindex [INDEXABSOLUTE $string $idx] 0] if {$index eq ""} { return "" } set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run set low -1 ;#low and high grapheme indexes set high -1 set lowc 0 ;#low and high column (1 based) set highc 0 set col1 "" set col2 "" set row 1 foreach {pt code} $ansisplits { if {$pt ne ""} { set ptlines [split $pt \n] set ptlinecount [llength $ptlines] set ptlineindex 0 foreach ptline $ptlines { set graphemes [punk::char::grapheme_split $ptline] if {$ptlineindex > 0} { #todo - account for previous \n as a grapheme .. what column? It should theoretically be in the rightmost column #zero width set low [expr {$high + 1}] set lowc [expr {$highc + 1}] set high $low set highc $lowc if {$index == $low} { set char \n set col1 $lowc set col2 $col1 break } incr row set lowc 0 set highc 0 } set low [expr {$high + 1}] ;#last high set lowc [expr {$highc + 1}] set high [expr {$low + [llength $graphemes] -1}] set highc [expr {$lowc + [punk::char::ansifreestring_width $ptline] -1}] #puts "---row:$row lowc:$lowc highc:$highc $ptline graphemes:$graphemes" if {$index >= $low && $index <= $high} { set char [lindex $graphemes $index-$low] set prefix [join [lrange $graphemes 0 [expr {$index-$low-1}]] ""] set prefixlen [punk::char::ansifreestring_width $prefix] set col1 [expr {$lowc + $prefixlen}] set gwidth [punk::char::ansifreestring_width $char] if {$gwidth < 1} { puts stderr "ansistring INDEXCOLUMNS warning - grapheme width zero at column $col1 ??" return "" ;#grapheme doesn't occupy a column and isn't a newline? - review } set col2 [expr {$col1 + ($gwidth -1)}] break } incr ptlineindex } } } if {$col1 ne "" & $col2 ne ""} { return [list $col1 $col2] } } #multiple rows - return a list? #return the grapheme index that occupies column col (could be first or second half of 2-wide grapheme) proc COLUMNINDEX {string col} { set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run set lowindex -1 ;#low and high grapheme indexes set highindex -1 set lowc 0 ;#low and high column (1 based) set highc 0 set col1 "" set col2 "" foreach {pt code} $ansisplits { if {$pt ne ""} { if {[string last \n $pt] < 0} { set graphemes [punk::char::grapheme_split $pt] set lowindex [expr {$highindex + 1}] ;#last high set lowc [expr {$highc + 1}] set highindex [expr {$lowindex + [llength $graphemes] -1}] set highc [expr {$lowc + [punk::char::ansifreestring_width $pt] -1}] if {$col >= $lowc && $col <= $highc} { if {$col == $lowc} { return $lowindex } elseif {$col == $highc} { return $highindex } set index [expr {$lowindex -1}] set str "" foreach g $graphemes { incr index append str $g set width [punk::char::ansifreestring_width $str] if {$lowc-1 + $width >= $col} { return $index } } error "ansistring COLUMNINDEX '$string' $col not found" ;#assertion - shouldn't happen } } else { error "ansistring COLUMNINDEX multiline not implemented" } } } } #*** !doctools #[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] } namespace eval punk::ansi::internal { proc splitn {str {len 1}} { #from textutil::split::splitn if {$len <= 0} { return -code error "len must be > 0" } if {$len == 1} { return [split $str {}] } set result [list] set max [string length $str] set i 0 set j [expr {$len -1}] while {$i < $max} { lappend result [string range $str $i $j] incr i $len incr j $len } return $result } proc splitx {str {regexp {[\t \r\n]+}}} { #from textutil::split::splitx # Bugfix 476988 if {[string length $str] == 0} { return {} } if {[string length $regexp] == 0} { return [::split $str ""] } if {[regexp $regexp {}]} { return -code error \ "splitting on regexp \"$regexp\" would cause infinite loop" } set list {} set start 0 while {[regexp -start $start -indices -- $regexp $str match submatch]} { foreach {subStart subEnd} $submatch break foreach {matchStart matchEnd} $match break incr matchStart -1 incr matchEnd lappend list [string range $str $start $matchStart] if {$subStart >= $start} { lappend list [string range $str $subStart $subEnd] } set start $matchEnd } lappend list [string range $str $start end] return $list } proc printing_length_addchar {i c} { upvar outchars outc upvar outsizes outs set nxt [llength $outc] if {$i < $nxt} { lset outc $i $c } else { lappend outc $c } } #string to 2digit hex - e.g used by XTGETTCAP proc str2hex {input} { set 2hex "" foreach ch [split $input ""] { append 2hex [format %02X [scan $ch %c]] } return $2hex } proc hex2str {2digithexchars} { set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) if {$2digithexchars eq ""} { return "" } if {[string length $2digithexchars] % 2 != 0} { error "hex2str requires an even number of hex digits (2 per character)" } set 2str "" foreach pair [splitn $2digithexchars 2] { append 2str [format %c 0x$pair] } return $2str } } # ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ ## Ready package provide punk::ansi [namespace eval punk::ansi { variable version set version 999999.0a1.0 }] return #*** !doctools #[manpage_end]