#[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}]
}
# -- --- --- --- --- --- --- --- --- --- ---
namespace eval punk::ansi::class {
#As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string.
oo::class create class_ansistring {
variable o_cksum_command
variable o_string
variable o_count
#this is the main state we keep of the split apart string
#we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext
variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes
variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split
constructor {string} {
set o_string $string
#we choose not to generate an internal split-state for the initial string - which may potentially be large.
#there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it.
#The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary
set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string)
set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc.
set o_ptlist [list]
#o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built.
#This is the count of visible graphems + non-ansi control chars. Not equal to column width or to the Tcl string length of the ansistripped string!!!
method count {} {
if {$o_count eq ""} {
#only initial string present
if {$o_string eq ""} {
set o_count 0
return 0
}
my MakeSplit
set o_count [my DoCount [join $o_ptlist ""]]
}
return $o_count
}
#this is the equivalent of Tcl string length on the ansistripped string
method length {} {
if {![llength $o_ansisplits]} {
if {[punk::ansi::ta::detect $o_string]} {
my MakeSplit
} else {
return [string length $o_string]
}
} elseif {[llength $o_ansisplits] == 1} {
#single split always means no ansi
return string length $o_string
}
return [string length [join $o_ptlist ""]]
}
method get {} {
return $o_string
}
method has_ansi {} {
if {![llength $o_ansisplits]} {
#initial string - for large strings,it's faster to run detect than update the internal split-state.
return [punk::ansi::ta::detect $o_string]
} else {
#string will continue to have a single o_ansisplits element if only non-ansi appended
return [expr {[llength $o_ansisplits] != 1}]
}
}
method append {args} {
set catstr [join $args ""]
if {$catstr eq ""} {
return $o_string
}
if {![punk::ansi::ta::detect $catstr]} {
#ansi-free additions
#if no initial internal-split - generate it without first appending our additions - as we can more efficiently append them to the internal state
if {![llength $o_ansisplits]} {
#initialise o_count because we need to add to it.
#The count method will do this by calling Makesplit only if it needs to. (which will create ansisplits for anything except empty string)
my count
}
append o_string $catstr;# only append after updating using my count above
if {![llength $o_ptlist]} {
#If the object was initialised with empty string - we can still have empty lists for o_ptlist and o_ansisplits
#even though we can use lset to add to a list - we can't for empty
lappend o_ptlist $catstr
#assert - if o_ptlist is empty so is o_ansisplits
lappend o_ansisplits $catstr
} else {
lset o_ptlist end [string cat [lindex $o_ptlist end] $catstr]
lset o_ansisplits end [string cat [lindex $o_ansisplits end] $catstr]
}
incr o_count [my DoCount $catstr]
} else {
if {![llength $o_ansisplits]} {
#if we have an initial string - but no internal split-state because this is our first append and no methods have caused its generation - we can run more efficiently by combining it with the first append
append o_string $catstr ;#append before split and count on whole lot
my MakeSplit
set combined_plaintext [join $o_ptlist ""]
set o_count [my DoCount $combined_plaintext]
return $o_string
} else {
#update each element of internal state incrementally without reprocessing what is already there.
append o_string $catstr
set newsplits [punk::ansi::ta::split_codes_single $catstr]
#todo - change to COUNT to emphasize the difference between this and doing a Tcl string length on the ansistriped string!
#review. Tabs/elastic tabstops. Do we want to count a tab as one element? Probably so if we are doing so for \n etc and not counting 2W unicode.
#Consider leaving tab manipualation for a width function which determines columns occupied for all such things.
proc COUNT {string} {
#*** !doctools
#[call [fun length] [arg string]]
#[para]Returns the length of the string without ansi codes
#[call [fun COUNT] [arg string]]
#[para]Returns the count of visible graphemes and non-ansi control characters
#[para]Incomplete! grapheme clustering support not yet implemented - only diacritics are currently clustered to count as one grapheme.
#[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence.
#[para]This is equivalent to calling string length on the result of stripansi $string
#[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal.
#[para]This is not quite equivalent to calling string length on the result of stripansi $string due to diacritics and/or grapheme combinations
#[para]Note that this returns the number of characters in the payload (after applying combiners)
#It is not always the same as the width of the string as rendered on a terminal due to 2wide Unicode characters and the usual invisible control characters such as \r and \n
#[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware.
#stripping diacritics only makes sense if we are counting them as combiners and also treating unicode grapheme combinations as single entities.
#as Our ansistring index function returns the character with diacritics, and will ultimately return grapheme clusters as a single element - we strip theme here as not counted.
#todo - combiners/diacritics? just map them away here?
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set string [regsub -all $re_diacritics $string ""]
#test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var
#These have similar algorithms/requirements - and should be refactored to be argument-wrappers over a function called something like overtype::renderblock
#overtype::renderblock could render the input to a defined (possibly overflowing in x or y) rectangle overlapping the underlay.
#(i.e not even necessariy having it's top left within the underlay)
namespace eval overtype::priv {
}
#could return larger than colwidth
proc _get_row_append_column {row} {
upvar outputlines outputlines
set idx [expr {$row -1}]
if {$row <= 1 || $row > [llength $outputlines]} {
return 1
} else {
upvar opt_overflow opt_overflow
upvar colwidth colwidth
set existinglen [punk::ansi::printing_length [lindex $outputlines $idx]]
set endpos [expr {$existinglen +1}]
if {$opt_overflow} {
return $endpos
} else {
if {$endpos > $colwidth} {
return $colwidth + 1
} else {
return $endpos
}
}
}
}
#string range should generally be avoided for both undertext and overtext which contain ansi escapes and other cursor affecting chars such as \b and \r
#render onto an already-rendered (ansi already processed) 'underlay' string, a possibly ansi-laden 'overlay' string.
#The underlay and overlay can be multiline blocks of text of varying line lengths.
error "overtype::left unknown option '$k'. Known options: $known_opts"
@ -262,6 +287,24 @@ proc overtype::left {args} {
set opt_exposed2 [dict get $opts -exposed2] ;#widechar_exposed_right - todo
# -- --- --- --- --- ---
# ----------------------------
# -experimental dev flag to set flags etc
# ----------------------------
set data_mode 0
set test_mode 0
set opt_experimental [dict get $opts -experimental]
foreach o $opt_experimental {
switch -- $o {
test_mode {
set test_mode 1
}
data_mode {
set data_mode 1
}
}
}
# ----------------------------
#modes
set insert_mode 0 ;#can be toggled by insert key or ansi IRM sequence ESC [ 4 h|l
set autowrap_mode $opt_wrap
@ -288,6 +331,15 @@ proc overtype::left {args} {
}
set overlines [split $overblock \n]
if {$test_mode} {
set lflines [list]
foreach ln $overlines {
append ln \n
lappend lflines $ln
}
lset lflines end [string range [lindex $lflines end] 0 end-1]
set overlines $lflines[unset lflines]
}
#overblock height/width isn't useful in the presence of an ansi input overlay with movements. The number of lines may bear little relationship to the output height
#underlines are not necessarily processed in order - depending on cursor-moves applied from overtext
set row 1
set prevrow 1
set col 1
set outputlines $underlines
set underlay_resets [list]
set overidx 0
#underlines are not necessarily processed in order - depending on cursor-moves applied from overtext
set prevrow 1
set row 1
if {$data_mode} {
set col [_get_row_append_column $row]
} else {
set col 1
}
while {$overidx < [llength $overlines]} {
flush stdout
@ -324,7 +381,7 @@ proc overtype::left {args} {
}
#review insert_mode. As an 'overtype' function whose main function is not interactive keystrokes - insert is secondary -
#but even if we didn't want it as an option to the function call - to process ansi adequately we need to support IRM (insertion-replacement mode) ESC [ 4 h|l