Browse Source

ansi art cp437 nul char fix + ansistring work

master
Julian Noble 8 months ago
parent
commit
a1e9865b80
  1. 161
      src/modules/punk/ansi-999999.0a1.0.tm
  2. BIN
      src/testansi/67_Calendar_2020_06_June.ans

161
src/modules/punk/ansi-999999.0a1.0.tm

@ -218,7 +218,9 @@ namespace eval punk::ansi {
#Layout for cp437 won't be right if you don't at least set width of control-chars to 1 - but also some images specifically use these glyphs
#most fonts don't seem to supply graphics for these control characters even when cp437 is in use - the c1 control glyphs appear to be more widely available - but we could add them here too
#by mapping these we can display regardless.
#nul char - no cp437 image. (which is good - because we use nul as a filler to mean empty column in overtype rendering)
#nul char - no cp437 image but commonly used as space in ansi graphics.
#(This is a potential conflict because we use nul as a filler to mean empty column in overtype rendering) REVIEW
dict set cp437_map \u0000 " " ;#space
dict set cp437_map \u0001 \u263A ;#smiley
dict set cp437_map \u0003 \u263B ;#smiley-filled
dict set cp437_map \u0003 \u2665 ;#heart
@ -1597,10 +1599,23 @@ namespace eval punk::ansi {
set codestate_initial $codestate_empty ;#keep a copy for resets.
set did_reset 0
#we should also handle 8bit CSI here? mixed \x1b\[ and \x9b ? Which should be used in the merged result?
#There are arguments to move to 8bit CSI for keyboard protocols (to solve keypress timing issues?) - but does this extend to SGR codes?
#we will output 7bit merge of the SGRs even if some or all were 8bit CSi
#As at 2024 - 7bit are widely supported 8bit seem to be often ignored by pseudoterminals
#auto-detecting and emitting 8bit only if any are present in our input doesn't seem like a good idea - as sgr_merge_list is only seeing a subset of the data - so any auto-decision at this level will just introduce indeterminism.
#review - consider a higher-level option for always emitting 8bit or always 7bit
#either way - if we get mixed CSI input - it probably makes more sense to merge their parameters than maintain the distinction and pass the mess downstream.
#We still output any non SGR codes in the list as they came in - preserving their CSI
foreach c $args {
switch -- [string index $c 1][string index $c end] {
#normalize 8bit to a token of the same length so our string operations on the code are the same and we can maintain a switch statement with literals rather than escapes
#.. but preserve original c
set cnorm [string map [list \x9b {8[} ] $c]
switch -- [string index $cnorm 1][string index $cnorm end] {
{[m} {
set params [string range $c 2 end-1] ;#strip leading esc lb and trailing m
set params [string range $cnorm 2 end-1] ;#strip leading esc lb and trailing m
#some systems use colon for 256 colors or RGB or nonstandard subparameters
#- it is therefore probably not ok to map to semicolon within SGR codes and treat the same.
@ -2185,6 +2200,15 @@ namespace eval punk::ansi::class {
variable o_from_ansistring o_to_ansistring
variable o_ns_from o_ns_to ;#some dirty encapsulation violation as a 'friend' of ansistring objects - direct record of namespaces as they are frequently accessed
constructor {args} {
#-- make assert available --
# By pointing it to the assert imported into ::punk::ansi::class
# (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking chained imports)
set nspath [namespace path]
if {"::punk::ansi::class" ni $nspath} {
lappend nspath ::punk::ansi::class
}
namespace path $nspath
#-- --
if {[llength $args] < 2} {
error {usage: ?-width <int>? ?-wrap [1|0]? ?-overflow [1|0]? from_ansistring to_ansistring}
}
@ -2251,14 +2275,72 @@ namespace eval punk::ansi::class {
}
method rendernext {} {
upvar ${o_ns_from}::o_ansisplits from_ansisplits
upvar ${o_ns_from}::o_elements elements
upvar ${o_ns_from}::o_elements from_elements
upvar ${o_ns_from}::o_splitindex from_splitindex
#if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend'
if {![llength $from_ansisplits]} {
namespace eval $o_ns_from {my MakeSplit}
}
set eidx [llength $o_rendereditems]
#compare what we've rendered so far to our source to confirm they're still in sync
if {[lrange $o_rendereditems 0 $eidx-1] ne [lrange $from_elements 0 $eidx-1]} {
puts stdout "rendereditems 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $o_rendereditems 0 $eidx-1]]"
puts stdout "from_elements 0->[expr {$eidx-1}]: [ansistring VIEW [lrange $from_elements 0 $eidx-1]]"
error "rendernext error - rendering state is out of sync. rendereditems list not-equal to corresponding part of ansistring $o_from_ansistring"
}
if {$eidx == [llength $from_elements]} {
#nothing new available
return [dict create type "" rendercount 0 start_count_unrendered 0 end_count_unrendered 0]
}
if {![llength $from_ansisplits]} {$o_from_ansistring eval_in {my MakeSplit}} ;#!!todo - a better way to keep this method semi hidden but call from a 'friend'
set elements_unrendered [expr {[llength $elements] - [llength $o_rendereditems]}]
set start_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}]
#we need to render in pt code chunks - not each grapheme element individually
#translate from element index to ansisplits index?
#translate from element index to ansisplits index
set process_splitindex [lindex $from_splitindex $eidx] ;#which from_ansisplits index the first unrendered element belongs to
set elementinfo [lindex $from_elements $eidx]
lassign $elementinfo type_rendered item
#we don't expect type to change should be all graphemes (type 'g') or a single code (type 'sgr','other' etc)
#review - we may want to store more info for graphemes e.g g0 g1 g2 for zero-wide 1-wide 2-wide ?
#if so - we should report a list of the grapheme types that were rendered in a pt block
#as a counterpoint however - we don't currently retrieve grapheme width during split (performance impact at wrong time?) - and width may depend on the rendering method anyway
#e.g c0 controls are normally zero printing width - but are (often) 1-wide glyphs in a cp437 rendering operation.
#we want to render all the elements in this splitindex - for pt this may be multiple, for code it will be a single element
set newtext ""
set rendercount 0
if {$type_rendered eq "g"} {
set e_splitindex $process_splitindex
while {$e_splitindex == $process_splitindex && $eidx < [llength $from_elements]} {
append newtext $item
lappend o_rendereditems $elementinfo
incr rendercount
incr eidx
set e_splitindex [lindex $from_splitindex $eidx]
set elementinfo [lindex $from_elements $eidx]
lassign $elementinfo _type item
}
} else {
set newtext $item
lappend o_rendereditems $elementinfo
incr rendercount
}
set end_elements_unrendered [expr {[llength $from_elements] - [llength $o_rendereditems]}]
set count_rendered [expr {$start_elements_unrendered - $end_elements_unrendered}]
assert {$rendercount == $count_rendered}
#todo - renderline equivalent?
return [dict create count_unrendered $elements_unrendered]
$o_to_ansistring append $newtext
return [dict create type $type_rendered rendercount $rendercount start_count_unrendered $start_elements_unrendered end_count_unrendered $end_elements_unrendered]
}
}
@ -2274,42 +2356,58 @@ namespace eval punk::ansi::class {
#As this is intended for column-based terminals - it has a different notion of string length, string index etc than for a plain string.
#oo names beginning with uppercase are private - so we can't use capitalisation as a hint to distinguish those which differ from Tcl semantics
oo::class create class_ansistring {
variable o_cksum_command
variable o_string
variable o_count
variable o_cksum_command o_string o_count
#this is the main state we keep of the split apart string
#we use the punk::ansi::ta::split_codes_single function which produces a list with zero, or an odd number elements always beginning and ending with plaintext
variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes
variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split
variable o_ptlist ;#plaintext as list of elements from ansisplits - will include empty elements from between adjacent ansi-codes
variable o_ansisplits ;#store our plaintext/ansi-code splits so we don't keep re-running the regexp to split
#State regarding output renderstring (if any)
variable o_renderout ;#another class_ansistring instance
variable o_renderer ;# punk::ansi::class::renderer::class_<rendertype> instance
variable o_renderout ;#another class_ansistring instance
variable o_renderer ;# punk::ansi::class::renderer::class_<rendertype> instance
variable o_renderwidth
variable o_rendertype
variable o_elements o_sgrstacks ;#elements contains entry for each grapheme/control + each ansi code, stacks has list of ansi sgr codes
variable o_gx0states
# -- per element lookups --
# llengths should all be the same
# we maintain 4 lookups per entry rather than a single nested list
# it is estimated that separate lists will be more efficient for certain operations - but that is open to review/testing.
variable o_elements ;#elements contains entry for each grapheme/control + each ansi code
variable o_sgrstacks ;#list of ansi sgr codes that will be merged later. Entries deliberately repeat if no change from previous entry. Later scans look for difference between n and n-1 when deciding where to apply codes.
variable o_gx0states ;#0|1 for alternate graphics gx0
variable o_splitindex ;#entry for each element indicating the index of the split it belongs to.
# -- --
constructor {string} {
set o_string $string
#-- make assert available --
# By pointing it to the assert imported into ::punk::ansi::class
# (we could alternatively import assert *directly* from ::punk::assertion::assert - but we can't chain imports as setting active flag renames the command, breaking imports)
set nspath [namespace path]
if {"::punk::ansi::class" ni $nspath} {
lappend nspath ::punk::ansi::class
}
namespace path $nspath
#-- --
#we choose not to generate an internal split-state for the initial string - which may potentially be large.
#there are a few methods such as get, has_ansi, show_state,checksum that can run efficiently on the initial string without generating it.
#The length method can use ansi::ta::detect to work quickly without updating it if it can, and other methods also update it as necessary
set o_count "" ;#o_count first updated when string appended or a method causes MakeSplit to run (or by count method if constructor argument was empty string)
set o_ansisplits [list] ;#we get empty pt(plaintext) between each ansi code. Codes include cursor movements, resets,alt graphics modes, terminal mode settings etc.
set o_ptlist [list]
#o_ansisplits and o_ptlist should only remain empty if an empty string was passed to the contructor, or no methods have yet triggered the initial string to have it's internal state built.
set o_elements [list]
set o_sgrstacks [list]
set o_gx0states [list]
set o_splitindex [list]
set o_cksum_command [list sha1::sha1 -hex]
@ -2394,16 +2492,22 @@ namespace eval punk::ansi::class {
set o_ptlist [list]
set codestack [list]
set gx0_state 0 ;#default off
set current_split_index 0 ;#incremented for each pt block, incremented for each code
foreach {pt code} $o_ansisplits {
lappend o_ptlist $pt
foreach grapheme [punk::char::grapheme_split $pt] {
lappend o_elements [list g $grapheme]
lappend o_sgrstacks $codestack
lappend o_gx0states $gx0_state
lappend o_splitindex $current_split_index
}
#after handling the pt block - incr the current_split_index
incr current_split_index ;#increment for each pt block - whether empty string or not. Indices corresponding to empty PT blocks will therefore not be present in o_splitindex as there were no elements in that ansisplit entry
#we will only get an empty code at the very end of ansisplits (ansisplits is length 0 or odd length - always with pt at start and pt at end)
if {$code ne ""} {
lappend o_sgrstacks $codestack
lappend o_gx0states $gx0_state
lappend o_splitindex $current_split_index
#maintenance warning - dup in append!
if {[punk::ansi::codetype::is_sgr_reset $code]} {
@ -2429,12 +2533,14 @@ namespace eval punk::ansi::class {
lappend o_elements [list other $code]
}
}
#after each code (ignoring bogus empty final due to foreach with 2 vars on odd-length list) increment the current_split_index
incr current_split_index
}
#assertion every grapheme and every individual code has been added to o_elements
#every element has an entry in o_sgrstacks
#every element has an entry in o_gx0states
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]}
}
#assertion every grapheme and every individual code has been added to o_elements
#every element has an entry in o_sgrstacks
#every element has an entry in o_gx0states
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]}
}
method convert_altg {} {
#do we need a method to retrieve without converting in the object?
@ -2617,10 +2723,12 @@ namespace eval punk::ansi::class {
}
set last_codestack [lindex $o_sgrstacks end]
set last_gx0state [lindex $o_gx0states end]
set current_split_index [expr {[llength $o_ansisplits]-1}] ;#we are attaching to existing trailing pt - use its splitindex
foreach grapheme [punk::char::grapheme_split $catstr] {
lappend o_elements [list g $grapheme]
lappend o_sgrstacks $last_codestack
lappend o_gx0states $last_gx0state
lappend o_splitindex $current_split_index
}
incr o_count [my DoCount $catstr]
} else {
@ -2630,7 +2738,7 @@ namespace eval punk::ansi::class {
my MakeSplit
set combined_plaintext [join $o_ptlist ""]
set o_count [my DoCount $combined_plaintext]
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]}
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]}
return $o_string
} else {
#update each element of internal state incrementally without reprocessing what is already there.
@ -2639,6 +2747,7 @@ namespace eval punk::ansi::class {
set ptnew ""
set codestack [lindex $o_sgrstacks end]
set gx0_state [lindex $o_gx0states end]
set current_split_index 0
foreach {pt code} $newsplits {
lappend o_ptlist $pt
append ptnew $pt
@ -2646,10 +2755,13 @@ namespace eval punk::ansi::class {
lappend o_elements [list g $grapheme]
lappend o_sgrstacks $codestack
lappend o_gx0states $gx0_state
lappend o_splitindex $current_split_index
}
incr current_split_index ;#increment 1 of 2 within each loop
if {$code ne ""} {
lappend o_sgrstacks $codestack
lappend o_gx0states $gx0_state
lappend o_splitindex $current_split_index
#maintenance - dup in MakeSplit!
if {[punk::ansi::codetype::is_sgr_reset $code]} {
set codestack [list]
@ -2674,6 +2786,7 @@ namespace eval punk::ansi::class {
lappend o_elements [list other $code]
}
}
incr current_split_index ;#increment 2 of 2
}
}
lset o_ansisplits end [string cat [lindex $o_ansisplits end] [lindex $newsplits 0]]
@ -2681,7 +2794,7 @@ namespace eval punk::ansi::class {
incr o_count [my DoCount $ptnew]
}
}
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states]}
assert {[llength $o_elements] == [llength $o_sgrstacks] && [llength $o_elements] == [llength $o_gx0states] && [llength $o_elements] == [llength $o_splitindex]}
return $o_string
}
#method append_and_render - append and render up to end of appended data at same time
@ -3556,7 +3669,7 @@ namespace eval punk::ansi::ansistring {
#Todo - rows! Note that a 'row' doesn't represent an output row if the ANSI string we are working with contains movement/cursor restores etc.
#The column/row concept works for an ansistring that has been 'rendered' to some defined area.
#row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many.
#row for arbitrary ANSI input only tells us which line of input we are in - e.g a single massive line of ANSI input would appear to have one row but could result in many rendered output rows.
#return pair of column extents occupied by the character index supplied.
#single-width grapheme will return pair of integers of equal value

BIN
src/testansi/67_Calendar_2020_06_June.ans

Binary file not shown.
Loading…
Cancel
Save