#Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs
#most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too
#by mapping these we can display regardless.
#nul char - no cp437 image. (which is good - because we use nul as a filler to mean empty column in overtype rendering)
#nul char - no cp437 image but commonly used as space in ansi graphics.
#(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW
set codestate_initial $codestate_empty ;#keep a copy for resets.
set did_reset 0
#we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result?
#There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes?
#we will output 7bit merge of the SGRs even if some or all were 8bit CSi
#As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals
#auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism.
#review - consider a higher-level option for always emitting 8bit or always 7bit
#either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream.
#We still output any non SGR codes in the list as they came in - preserving their CSI
foreach c $args {
switch -- [string index $c 1][string index $c end] {
#normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes
#.. but preserve original c
set cnorm [string map [list \x9b {8[} ] $c]
switch -- [string index $cnorm 1][string index $cnorm end] {
{[m} {
set params [string range $c 2 end-1] ;#strip leading esc lb and trailing m
set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m
#some systems use colon for 256 colors or RGB or nonstandard subparameters
#- it is therefore probably not ok to map to semicolon within SGR codes and treat the same.
variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed
constructor {args} {
#-- make assert available --
# By pointing it to the assert imported into ::punk::ansi::class
# (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports)
if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend'
set elements_unrendered [expr {[llength $elements] - [llength $o_rendereditems]}]
#if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend'
if {![llength $from_ansisplits]} {
namespace eval $o_ns_from {my MakeSplit}
}
set eidx [llength $o_rendereditems]
#compare what we've rendered so far to our source to confirm they're still in sync
if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} {
set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}]
#we need to render in pt code chunks - not each grapheme element individually
#translate from element index to ansisplits index?
#translate from element index to ansisplits index
set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to
set elementinfo [lindex $from_elements $eidx]
lassign $elementinfo type_rendered item
#we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc)
#review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ?
#if so - we should report a list of the grapheme types that were rendered in a pt block
#as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway
#e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation.
#we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element
#As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string.
#oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics
oo::class create class_ansistring {
variable o_cksum_command
variable o_string
variable o_count
variable o_cksum_command o_string o_count
#this is the main state we keep of the split apart string
#we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext
variable o_elements o_sgrstacks ;#elements contains entry for each grapheme/control + each ansi code, stacks has list of ansi sgr codes
variable o_gx0states
# -- per element lookups --
# llengths should all be the same
# we maintain 4 lookups per entry rather than a single nested list
# it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing.
variable o_elements ;#elements contains entry for each grapheme/control + each ansi code
variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes.
variable o_gx0states ;#0|1 for alternate graphics gx0
variable o_splitindex ;#entry for each element indicating the index of the split it belongs to.
# -- --
constructor {string} {
set o_string $string
#-- make assert available --
# By pointing it to the assert imported into ::punk::ansi::class
# (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports)
set nspath [namespace path]
if {"::punk::ansi::class" ni $nspath} {
lappend nspath ::punk::ansi::class
}
namespace path $nspath
#-- --
#we choose not to generate an internal split-state for the initial string - which may potentially be large.
#there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it.
#The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary
set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string)
set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc.
set o_ptlist [list]
#o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built.
#after handling the pt block - incr the current_split_index
incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry
#we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end)
#Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc.
#The column/row concept works for an ansistring that has been 'rendered' to some defined area.
#row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many.
#row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows.
#return pair of column extents occupied by the character index supplied.
#single-width grapheme will return pair of integers of equal value