|
|
# -*- tcl -*- |
|
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
|
# |
|
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
# (C) 2023 |
|
|
# |
|
|
# @@ Meta Begin |
|
|
# Application punk::ansi 0.1.1 |
|
|
# Meta platform tcl |
|
|
# Meta license <unspecified> |
|
|
# @@ Meta End |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
# doctools header |
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
#*** !doctools |
|
|
#[manpage_begin punkshell_module_punk::ansi 0 0.1.1] |
|
|
#[copyright "2023"] |
|
|
#[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] |
|
|
#[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] |
|
|
#[require punk::ansi] |
|
|
#[keywords module ansi terminal console string] |
|
|
#[description] |
|
|
#[para]Ansi based terminal control string functions |
|
|
#[para]See [package punk::ansi::console] for related functions for controlling a console |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
#*** !doctools |
|
|
#[section Overview] |
|
|
#[para] overview of punk::ansi |
|
|
#[para]punk::ansi functions return their values - no implicit emission to console/stdout |
|
|
#[subsection Concepts] |
|
|
#[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner |
|
|
#[para]There are many differences in terminal implementations - but most should support a core set of features |
|
|
#[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. |
|
|
#[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. |
|
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
## Requirements |
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
#*** !doctools |
|
|
#[subsection dependencies] |
|
|
#[para] packages used by punk::ansi |
|
|
#[list_begin itemized] |
|
|
|
|
|
package require Tcl 8.6- |
|
|
package require punk::char |
|
|
#*** !doctools |
|
|
#[item] [package {Tcl 8.6-}] |
|
|
#[item] [package {punk::char}] |
|
|
|
|
|
# #package require frobz |
|
|
# #*** !doctools |
|
|
# #[item] [package {frobz}] |
|
|
|
|
|
#*** !doctools |
|
|
#[list_end] |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
|
|
#*** !doctools |
|
|
#[section API] |
|
|
|
|
|
|
|
|
namespace eval punk::ansi::class { |
|
|
if {![llength [info commands class_ansi]]} { |
|
|
oo::class create class_ansi { |
|
|
variable o_raw |
|
|
|
|
|
variable o_render_dimensions ;#last dimensions at which we rendered |
|
|
variable o_rendered |
|
|
variable o_rendered_what |
|
|
constructor {ansitext {dimensions 80x25}} { |
|
|
if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { |
|
|
error "class_ansi::render dimensions must be of the form <width>x<height>" |
|
|
} |
|
|
set o_rendered_what "" |
|
|
set o_render_dimensions $dimensions |
|
|
set o_raw $ansitext |
|
|
} |
|
|
method rawdata {} { |
|
|
return $o_raw |
|
|
} |
|
|
method render {{dimensions ""}} { |
|
|
if {$dimensions eq ""} { |
|
|
set dimensions $o_render_dimensions |
|
|
} |
|
|
if {![regexp {^([0-9]+)[xX]([0-9]+)$} $dimensions _m w h]} { |
|
|
error "class_ansi::render dimensions must be of the form <width>x<height>" |
|
|
} |
|
|
if {$o_rendered_what ne $o_raw || $dimensions ne $o_render_dimensions} { |
|
|
set b [textblock::block $w $h " "] |
|
|
set o_rendered [overtype::left $b $o_raw] |
|
|
set o_rendered_what $o_raw |
|
|
set o_render_dimensions $dimensions |
|
|
} |
|
|
|
|
|
#todo - store rendered and allow partial rendering of new data lines? |
|
|
return $o_rendered |
|
|
} |
|
|
method viewlines {} { |
|
|
return [ansistring VIEW $o_raw] |
|
|
} |
|
|
method viewcodes {} { |
|
|
return [ansistring VIEWCODES $o_raw] |
|
|
} |
|
|
method viewchars {} { |
|
|
return [punk::ansi::stripansiraw $o_raw] |
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
namespace eval punk::ansi { |
|
|
#*** !doctools |
|
|
#[subsection {Namespace punk::ansi}] |
|
|
#[para] Core API functions for punk::ansi |
|
|
#[list_begin definitions] |
|
|
|
|
|
|
|
|
#see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control |
|
|
|
|
|
variable test "blah\033\[1;33mETC\033\[0;mOK" |
|
|
|
|
|
#Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. |
|
|
namespace export\ |
|
|
{a?} {a+} a \ |
|
|
ansistring\ |
|
|
convert*\ |
|
|
clear*\ |
|
|
cursor_*\ |
|
|
detect*\ |
|
|
get_*\ |
|
|
move*\ |
|
|
reset*\ |
|
|
strip*\ |
|
|
test_decaln\ |
|
|
titleset\ |
|
|
|
|
|
|
|
|
variable escape_terminals |
|
|
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). |
|
|
dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] |
|
|
#dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic |
|
|
dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals |
|
|
dict set escape_terminals DCS [list \007 \033\\ \u009c] |
|
|
dict set escape_terminals MISC [list \007 \033\\ \u009c] |
|
|
#NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) |
|
|
#This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? |
|
|
|
|
|
#review - there doesn't seem to be an \x1b#7 |
|
|
# https://espterm.github.io/docs/VT100%20escape%20codes.html |
|
|
|
|
|
#self-contained 2 byte ansi escape sequences - review more? |
|
|
set ansi_2byte_codes_dict [dict create\ |
|
|
"reset_terminal" "\u001bc"\ |
|
|
"save_cursor_posn" "\u001b7"\ |
|
|
"restore_cursor_posn" "\u001b8"\ |
|
|
"cursor_up_one" "\u001bM"\ |
|
|
"NEL - Next Line" "\u001bE"\ |
|
|
"IND - Down one line" "\u001bD"\ |
|
|
"HTS - Set Tab Stop" "\u001bH"\ |
|
|
"DECPAM app keypad" "\x1b="\ |
|
|
"DECPNM norm keypad" "\x1b>"\ |
|
|
] |
|
|
|
|
|
|
|
|
#review - We have file possibly encoded directly in another codepage such as 437 - or utf8,utf16 etc, but then still needing post conversion to e.g cp437? |
|
|
proc readfile {fname} { |
|
|
#todo |
|
|
#1- look for BOM - read according to format given by BOM |
|
|
#2- assume utf-8 |
|
|
#3- if errors - assume cp437? |
|
|
|
|
|
set data [fcat $fname] |
|
|
if {[file extension $fname] eq ".ans"} { |
|
|
set ansidata [encoding convertfrom cp437 $data] |
|
|
} else { |
|
|
set ansidata $data |
|
|
} |
|
|
set obj [punk::ansi::class::class_ansi new $ansidata] |
|
|
return $obj |
|
|
} |
|
|
proc is_utf8_char {char} { |
|
|
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) |
|
|
[\x00-\x7F] | # Single-byte chars (ASCII range) |
|
|
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) |
|
|
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) |
|
|
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) |
|
|
} $char |
|
|
} |
|
|
proc get_utf8 {text} { |
|
|
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-) |
|
|
\A ( |
|
|
[\x00-\x7F] | # Single-byte chars (ASCII range) |
|
|
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF) |
|
|
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF) |
|
|
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5) |
|
|
) + |
|
|
} $text completeChars |
|
|
return $completeChars |
|
|
} |
|
|
#control strings |
|
|
#https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf |
|
|
#<excerpt> |
|
|
#A control string is a string of bit combinations which may occur in the data stream as a logical entity for |
|
|
#control purposes. A control string consists of an opening delimiter, a command string or a character string, |
|
|
#and a terminating delimiter, the STRING TERMINATOR (ST). |
|
|
#A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. |
|
|
#A character string is a sequence of any bit combination, except those representing START OF STRING |
|
|
#(SOS) or STRING TERMINATOR (ST). |
|
|
#The interpretation of the command string or the character string is not defined by this Standard, but instead |
|
|
#requires prior agreement between the sender and the recipient of the data. |
|
|
#The opening delimiters defined in this Standard are |
|
|
#a) APPLICATION PROGRAM COMMAND (APC) |
|
|
#b) DEVICE CONTROL STRING (DCS) |
|
|
#c) OPERATING SYSTEM COMMAND (OSC) |
|
|
#d) PRIVACY MESSAGE (PM) |
|
|
#e) START OF STRING (SOS) |
|
|
#</excerpt> |
|
|
|
|
|
#debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. |
|
|
# "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ |
|
|
#The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. |
|
|
#todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. |
|
|
#review - can terminals handle SGR codes within a PM? |
|
|
#Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) |
|
|
proc controlstring_PM {text} { |
|
|
return "\x1b^${text}\033\\" |
|
|
} |
|
|
proc controlstring_PM8 {text} { |
|
|
return "\x9e${text}\x9c" |
|
|
} |
|
|
proc controlstring_SOS {text} { |
|
|
return "\x1bX${text}\033\\" |
|
|
} |
|
|
proc controlstring_SOS8 {text} { |
|
|
return "\x98${text}\x9c" |
|
|
} |
|
|
proc controlstring_APC {text} { |
|
|
return "\x1b_${text}\033\\" |
|
|
} |
|
|
proc controlstring_APC8 {text} { |
|
|
return "\x9f${text}\x9c" |
|
|
} |
|
|
#there is also the SGR hide code (8) which has intermittent terminal support |
|
|
#This doesn't change the output length - so support is tricky to detec. (terminal checksum report?) |
|
|
|
|
|
#candidate for zig/c implementation? |
|
|
proc stripansi {text} { |
|
|
#*** !doctools |
|
|
#[call [fun stripansi] [arg text] ] |
|
|
#[para]Return a string with ansi codes stripped out |
|
|
#[para]Alternate graphics chars are replaced with modern unicode equivalents (e.g boxdrawing glyphs) |
|
|
|
|
|
#using detect costs us a couple of uS - but saves time on plain text |
|
|
#we should probably leave this for caller - otherwise it ends up being called more than necessary |
|
|
#if {![::punk::ansi::ta::detect $text]} { |
|
|
# return $text |
|
|
#} |
|
|
|
|
|
set text [convert_g0 $text];#Convert ansi borders to unicode line drawing instead of ascii letters |
|
|
join [::punk::ansi::ta::split_at_codes $text] "" |
|
|
} |
|
|
proc stripansiraw {text} { |
|
|
#*** !doctools |
|
|
#[call [fun stripansi] [arg text] ] |
|
|
#[para]Return a string with ansi codes stripped out |
|
|
#[para]Alternate graphics modes will be stripped - exposing the raw characters as they appear without graphics mode. |
|
|
#[para]ie instead of a horizontal line you may see: qqqqqq |
|
|
|
|
|
|
|
|
join [::punk::ansi::ta::split_at_codes $text] "" |
|
|
} |
|
|
proc stripansi1 {text} { |
|
|
|
|
|
#todo - character set selection - SS2 SS3 - how are they terminated? REVIEW |
|
|
|
|
|
variable escape_terminals ;#dict |
|
|
variable ::punk::ansi::ta::standalone_code_map ;#map to empty string |
|
|
|
|
|
set text [convert_g0 $text] |
|
|
|
|
|
|
|
|
set text [string map $standalone_code_map $text] |
|
|
#e.g standalone 2 byte and 3 byte VT100(?) sequences - some of these work in wezterm |
|
|
#\x1b#3 double-height letters top half |
|
|
#\x1b#4 double-height letters bottom half |
|
|
#\x1b#5 single-width line |
|
|
#\x1b#6 double-width line |
|
|
#\x1b#8 dec test fill screen |
|
|
|
|
|
|
|
|
#we process char by char - line-endings whether \r\n or \n should be processed as per any other character. |
|
|
|
|
|
#Theoretically line endings can occur within an ST payload (review e.g title?) |
|
|
#ecma standard says: The character string following may consist of any bit combination, except those representing SOS or STRING TERMINATOR (ST) |
|
|
|
|
|
set inputlist [split $text ""] |
|
|
set outputlist [list] |
|
|
|
|
|
set in_escapesequence 0 |
|
|
#assumption - text already 'rendered' - ie no cursor movement controls . (what about backspace and lone carriage returns - they are horizontal cursor movements) |
|
|
|
|
|
set i 0 |
|
|
foreach u $inputlist { |
|
|
set v [lindex $inputlist $i+1] |
|
|
set uv ${u}${v} |
|
|
if {$in_escapesequence eq "2b"} { |
|
|
#2nd byte - done. |
|
|
set in_escapesequence 0 |
|
|
} elseif {$in_escapesequence != 0} { |
|
|
set endseq [dict get $escape_terminals $in_escapesequence] |
|
|
if {$u in $endseq} { |
|
|
set in_escapesequence 0 |
|
|
} elseif {$uv in $endseq} { |
|
|
set in_escapesequence 2b ;#flag next byte as last in sequence |
|
|
} |
|
|
} else { |
|
|
#handle both 7-bit and 8-bit CSI and OSC |
|
|
if {[regexp {^(?:\033\[|\u009b)} $uv]} { |
|
|
set in_escapesequence CSI |
|
|
} elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { |
|
|
set in_escapesequence OSC |
|
|
} elseif {[regexp {^(?:\033P|\u0090)} $uv]} { |
|
|
set in_escapesequence DCS |
|
|
} elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { |
|
|
#SOS,PM,APC - all terminated with ST |
|
|
set in_escapesequence MISC |
|
|
} else { |
|
|
lappend outputlist $u |
|
|
} |
|
|
} |
|
|
incr i |
|
|
} |
|
|
return [join $outputlist ""] |
|
|
} |
|
|
|
|
|
#review - what happens when no terminator? |
|
|
#todo - map other chars to unicode equivs |
|
|
# convert esc(0 -> esc(B graphics sequences to single char unicode equivalents e.g box drawing set |
|
|
# esc) ?? |
|
|
proc convert_g0 {text} { |
|
|
#using not \033 inside to stop greediness - review how does it compare to ".*?" |
|
|
#variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} |
|
|
#set re {\033\(0[^\033]*\033\(B} |
|
|
#set re {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} |
|
|
|
|
|
|
|
|
set re2 {\033\(0(.*)\033\(B} ;#capturing |
|
|
|
|
|
#puts --$g-- |
|
|
#box sample |
|
|
#lqk |
|
|
#x x |
|
|
#mqj |
|
|
#m = boxd_lur |
|
|
#set map [list l \u250f k \u2513] ;#heavy |
|
|
set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines |
|
|
#todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html |
|
|
|
|
|
set re_g0_open_or_close {\x1b\(0|\x1b\(B} |
|
|
set parts [::punk::ansi::ta::_perlish_split $re_g0_open_or_close $text] |
|
|
set out "" |
|
|
set g0_on 0 |
|
|
foreach {pt g} $parts { |
|
|
if {$g0_on} { |
|
|
#split for non graphics-set codes |
|
|
set othersplits [punk::ansi::ta::split_codes $pt] ;#we don't need single codes here |
|
|
foreach {innerpt innercodes} $othersplits { |
|
|
append out [string map $map $innerpt] |
|
|
append out $innercodes ;#Simplifying assumption - ST codes, titlesets etc don't require/use g0 content |
|
|
} |
|
|
} else { |
|
|
append out $pt ;#may include other codes - put it all through. |
|
|
} |
|
|
if {$g ne ""} { |
|
|
if {[punk::ansi::codetype::is_gx_open $g]} { |
|
|
set g0_on 1 |
|
|
} elseif {[punk::ansi::codetype::is_gx_close $g]} { |
|
|
set g0_on 0 |
|
|
} |
|
|
} |
|
|
} |
|
|
return $out |
|
|
} |
|
|
proc convert_g0_wrong {text} { |
|
|
#Attempting to split on a group is wrong - because there could be other ansi codes while inside a g0 section |
|
|
#That will either stop us matching - so no conversion - or risk converting parts of the ansi codes |
|
|
#using not \033 inside to stop greediness - review how does it compare to ".*?" |
|
|
#variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} |
|
|
set re {\033\(0[^\033]*\033\(B} |
|
|
set re2 {\033\(0(.*)\033\(B} ;#capturing |
|
|
|
|
|
#box sample |
|
|
#lqk |
|
|
#x x |
|
|
#mqj |
|
|
#m = boxd_lur |
|
|
#set map [list l \u250f k \u2513] ;#heavy |
|
|
set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light box drawing lines |
|
|
#todo - map the rest https://vt100.net/docs/vt220-rm/chapter2.html |
|
|
|
|
|
set parts [::punk::ansi::ta::_perlish_split $re $text] |
|
|
set out "" |
|
|
foreach {pt g} $parts { |
|
|
append out $pt |
|
|
if {$g ne ""} { |
|
|
#puts --$g-- |
|
|
regexp $re2 $g _match contents |
|
|
append out [string map $map $contents] |
|
|
} |
|
|
} |
|
|
return $out |
|
|
} |
|
|
proc g0 {text} { |
|
|
return \x1b(0$text\x1b(B |
|
|
} |
|
|
proc stripansi_gx {text} { |
|
|
#e.g "\033(0" - select VT100 graphics for character set G0 |
|
|
#e.g "\033(B" - reset |
|
|
#e.g "\033)0" - select VT100 graphics for character set G1 |
|
|
#e.g "\033)X" - where X is any char other than 0 to reset ?? |
|
|
|
|
|
#return [convert_g0 $text] |
|
|
return [string map [list "\x1b(0" "" \x1b(B" "" "\x1b)0" "" "\x1b)X" ""] $text] |
|
|
} |
|
|
|
|
|
|
|
|
#CSI <n> m = SGR (Select Graphic Rendition) |
|
|
variable SGR_setting_map { |
|
|
bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 |
|
|
underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 |
|
|
reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 |
|
|
overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 |
|
|
} |
|
|
variable SGR_colour_map { |
|
|
black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 |
|
|
Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 |
|
|
BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 |
|
|
} |
|
|
variable SGR_map |
|
|
set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] |
|
|
|
|
|
|
|
|
proc colourmap1 {{bgname White}} { |
|
|
package require textblock |
|
|
|
|
|
set bg [textblock::block 33 3 "[a+ $bgname] [a]"] |
|
|
set colormap "" |
|
|
for {set i 0} {$i <= 7} {incr i} { |
|
|
append colormap "_[a+ white bold 48\;5\;$i] $i [a]" |
|
|
} |
|
|
set map1 [overtype::left -transparent _ $bg "\n$colormap"] |
|
|
return $map1 |
|
|
} |
|
|
proc colourmap2 {{bgname White}} { |
|
|
package require textblock |
|
|
set bg [textblock::block 39 3 "[a+ $bgname] [a]"] |
|
|
set colormap "" |
|
|
for {set i 8} {$i <= 15} {incr i} { |
|
|
append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey |
|
|
} |
|
|
set map2 [overtype::left -transparent _ $bg "\n$colormap"] |
|
|
return $map2 |
|
|
} |
|
|
proc a? {args} { |
|
|
#*** !doctools |
|
|
#[call [fun a?] [opt {ansicode...}]] |
|
|
#[para]Return an ansi string representing a table of codes and a panel showing the colours |
|
|
variable SGR_setting_map |
|
|
variable SGR_colour_map |
|
|
|
|
|
if {![llength $args]} { |
|
|
set out "" |
|
|
append out $SGR_setting_map \n |
|
|
append out $SGR_colour_map \n |
|
|
|
|
|
try { |
|
|
package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try |
|
|
set bgname "White" |
|
|
set map1 [colourmap1 $bgname] |
|
|
set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] |
|
|
set map2 [colourmap2 $bgname] |
|
|
set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] |
|
|
append out [textblock::join $map1 " " $map2] \n |
|
|
#append out $map1[a] \n |
|
|
#append out $map2[a] \n |
|
|
|
|
|
|
|
|
|
|
|
} on error {result options} { |
|
|
puts stderr "Failed to draw colormap" |
|
|
puts stderr "$result" |
|
|
} finally { |
|
|
return $out |
|
|
} |
|
|
} else { |
|
|
set result [list] |
|
|
set rmap [lreverse $map] |
|
|
foreach i $args { |
|
|
if {[string is integer -strict $i]} { |
|
|
if {[dict exists $rmap $i]} { |
|
|
lappend result $i [dict get $rmap $i] |
|
|
} |
|
|
} else { |
|
|
if {[dict exists $map $i]} { |
|
|
lappend result $i [dict get $map $i] |
|
|
} |
|
|
} |
|
|
} |
|
|
return $result |
|
|
} |
|
|
} |
|
|
proc a+ {args} { |
|
|
#*** !doctools |
|
|
#[call [fun a+] [opt {ansicode...}]] |
|
|
#[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first |
|
|
#[para] e.g to set foreground red and bold |
|
|
#[para]punk::ansi::a red bold |
|
|
#[para]to set background red |
|
|
#[para]punk::ansi::a Red |
|
|
#[para]see [cmd punk::ansi::a?] to display a list of codes |
|
|
|
|
|
#don't disable ansi here. |
|
|
#we want this to be available to call even if ansi is off |
|
|
variable SGR_map |
|
|
set t [list] |
|
|
foreach i $args { |
|
|
if {[string is integer -strict $i]} { |
|
|
lappend t $i |
|
|
} elseif {[string first ";" $i] >=0} { |
|
|
#literal with params |
|
|
lappend t $i |
|
|
} else { |
|
|
if {[dict exists $SGR_map $i]} { |
|
|
lappend t [dict get $SGR_map $i] |
|
|
} else { |
|
|
#accept examples for foreground |
|
|
# 256f-# or 256fg-# or 256f# |
|
|
# rgbf-<r>-<g>-<b> or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> |
|
|
switch -nocase -glob -- $i { |
|
|
"256f*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "38;5;$cc" |
|
|
} |
|
|
"256b*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "48;5;$cc" |
|
|
} |
|
|
"rgbf*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "38;2;$r;$g;$b" |
|
|
} |
|
|
"rgbb*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "48;2;$r;$g;$b" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
# \033 - octal. equivalently \x1b in hex which is more common in documentation |
|
|
if {![llength $t]} { |
|
|
return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) |
|
|
} |
|
|
return "\x1b\[[join $t {;}]m" |
|
|
} |
|
|
proc a2 {args} { |
|
|
#*** !doctools |
|
|
#[call [fun a] [opt {ansicode...}]] |
|
|
#[para]Returns the ansi code to reset any current settings and apply those from the supplied list |
|
|
#[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text |
|
|
#[para] e.g to set foreground red and bold |
|
|
#[para]punk::ansi::a red bold |
|
|
#[para]to set background red |
|
|
#[para]punk::ansi::a Red |
|
|
#[para]see [cmd punk::ansi::a?] to display a list of codes |
|
|
|
|
|
|
|
|
#don't disable ansi here. |
|
|
#we want this to be available to call even if ansi is off |
|
|
variable SGR_map |
|
|
set t [list] |
|
|
foreach i $args { |
|
|
if {[dict exists $SGR_map $i]} { |
|
|
lappend t [dict get $SGR_map $i] |
|
|
} else { |
|
|
if {[string is integer -strict $i]} { |
|
|
lappend t $i |
|
|
} elseif {[string first ";" $i] >=0} { |
|
|
#literal with params |
|
|
lappend t $i |
|
|
} else { |
|
|
#accept examples for foreground |
|
|
# 256f-# or 256fg-# or 256f# |
|
|
# rgbf-<r>-<g>-<b> or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> |
|
|
switch -nocase -glob -- $i { |
|
|
"256f*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "38;5;$cc" |
|
|
} |
|
|
"256b*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "48;5;$cc" |
|
|
} |
|
|
"rgbf*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "38;2;$r;$g;$b" |
|
|
} |
|
|
"rgbb*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "48;2;$r;$g;$b" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
# \033 - octal. equivalently \x1b in hex which is more common in documentation |
|
|
# empty list [a=] should do reset - same for [a= nonexistant] |
|
|
# explicit reset at beginning of parameter list for a= (as opposed to a+) |
|
|
set t [linsert $t 0 0] |
|
|
return "\x1b\[[join $t {;}]m" |
|
|
} |
|
|
proc a {args} { |
|
|
#*** !doctools |
|
|
#[call [fun a] [opt {ansicode...}]] |
|
|
#[para]Returns the ansi code to reset any current settings and apply those from the supplied list |
|
|
#[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text |
|
|
#[para] e.g to set foreground red and bold |
|
|
#[para]punk::ansi::a red bold |
|
|
#[para]to set background red |
|
|
#[para]punk::ansi::a Red |
|
|
#[para]see [cmd punk::ansi::a?] to display a list of codes |
|
|
|
|
|
#variable SGR_setting_map { |
|
|
# bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 |
|
|
# underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 |
|
|
# reverse 7 noreverse 27 defaultfg 39 defaultbg 49 nohide 28 |
|
|
# overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 |
|
|
#} |
|
|
#variable SGR_colour_map { |
|
|
# black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 |
|
|
# Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 |
|
|
# BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 |
|
|
#} |
|
|
|
|
|
#don't disable ansi here. |
|
|
#we want this to be available to call even if ansi is off |
|
|
#variable SGR_map |
|
|
set t [list] |
|
|
foreach i $args { |
|
|
switch -- $i { |
|
|
bold {lappend t 1} |
|
|
dim {lappend t 2} |
|
|
blink {lappend t 5} |
|
|
fastblink {lappend t 6} |
|
|
noblink {lappend t 25} |
|
|
hide {lappend t 8} |
|
|
normal {lappend t 22} |
|
|
underline {lappend t 4} |
|
|
doubleunderline {lappend t 21} |
|
|
nounderline {lappend t 24} |
|
|
strike {lappend t 9} |
|
|
nostrike {lappend t 29} |
|
|
italic {lappend t 3} |
|
|
noitalic {lappend t 23} |
|
|
reverse {lappend t 7} |
|
|
noreverse {lappend t 27} |
|
|
defaultfb {lappend t 39} |
|
|
defaultbg {lappedn t 49} |
|
|
nohide {lappend t 28} |
|
|
overline {lappend t 53} |
|
|
nooverline {lappend t 55} |
|
|
frame {lappend t 51} |
|
|
framecircle {lappend t 52} |
|
|
noframe {lappend t 54} |
|
|
black {lappend t 30} |
|
|
red {lappend t 31} |
|
|
green {lappend t 32} |
|
|
yellow {lappend t 33} |
|
|
blue {lappend t 34} |
|
|
purple {lappend t 35} |
|
|
cyan {lappend t 36} |
|
|
white {lappend t 37} |
|
|
Black {lappend t 40} |
|
|
Red {lappend t 41} |
|
|
Green {lappend t 42} |
|
|
Yellow {lappend t 43} |
|
|
Blue {lappend t 44} |
|
|
Purple {lappend t 45} |
|
|
Cyan {lappend t 46} |
|
|
White {lappend t 47} |
|
|
BLACK {lappend t 100} |
|
|
RED {lappend t 101} |
|
|
GREEN {lappend t 101} |
|
|
YELLOW {lappend t 103} |
|
|
BLUE {lappend t 104} |
|
|
PURPLE {lappend t 105} |
|
|
CYAN {lappend t 106} |
|
|
WHITE {lappend t 107} |
|
|
default { |
|
|
if {[string is integer -strict $i]} { |
|
|
lappend t $i |
|
|
} elseif {[string first ";" $i] >=0} { |
|
|
#literal with params |
|
|
lappend t $i |
|
|
} else { |
|
|
#accept examples for foreground |
|
|
# 256f-# or 256fg-# or 256f# |
|
|
# rgbf-<r>-<g>-<b> or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> |
|
|
switch -nocase -glob -- $i { |
|
|
"256f*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "38;5;$cc" |
|
|
} |
|
|
"256b*" { |
|
|
set cc [string trim [string range $i 4 end] -gG] |
|
|
lappend t "48;5;$cc" |
|
|
} |
|
|
"rgbf*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "38;2;$r;$g;$b" |
|
|
} |
|
|
"rgbb*" { |
|
|
set rgb [string trim [string range $i 4 end] -gG] |
|
|
lassign [split $rgb -] r g b |
|
|
lappend t "48;2;$r;$g;$b" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
# \033 - octal. equivalently \x1b in hex which is more common in documentation |
|
|
# empty list [a=] should do reset - same for [a= nonexistant] |
|
|
# explicit reset at beginning of parameter list for a= (as opposed to a+) |
|
|
set t [linsert $t 0 0] |
|
|
return "\x1b\[[join $t {;}]m" |
|
|
} |
|
|
|
|
|
proc ansiwrap {codes text} { |
|
|
return [a {*}$codes]$text[a] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
proc get_code_name {code} { |
|
|
#*** !doctools |
|
|
#[call [fun get_code_name] [arg code]] |
|
|
#[para]for example |
|
|
#[para] get_code_name red will return 31 |
|
|
#[para] get_code_name 31 will return red |
|
|
variable SGR_map |
|
|
set res [list] |
|
|
foreach i [split $code ";"] { |
|
|
set ix [lsearch -exact $SGR_map $i] |
|
|
if {[string is digit -strict $code]} { |
|
|
if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} |
|
|
} else { |
|
|
#reverse lookup code from name |
|
|
if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} |
|
|
} |
|
|
} |
|
|
set res |
|
|
} |
|
|
proc reset {} { |
|
|
#*** !doctools |
|
|
#[call [fun reset]] |
|
|
#[para]reset console |
|
|
return "\x1bc" |
|
|
} |
|
|
proc reset_soft {} { |
|
|
#*** !doctools |
|
|
#[call [fun reset_soft]] |
|
|
return \x1b\[!p |
|
|
} |
|
|
proc reset_colour {} { |
|
|
#*** !doctools |
|
|
#[call [fun reset_colour]] |
|
|
#[para]reset colour only |
|
|
return "\x1b\[0m" |
|
|
} |
|
|
|
|
|
# -- --- --- --- --- |
|
|
proc clear {} { |
|
|
#*** !doctools |
|
|
#[call [fun clear]] |
|
|
return "\033\[2J" |
|
|
} |
|
|
proc clear_above {} { |
|
|
#*** !doctools |
|
|
#[call [fun clear_above]] |
|
|
return \033\[1J |
|
|
} |
|
|
proc clear_below {} { |
|
|
#*** !doctools |
|
|
#[call [fun clear_below]] |
|
|
return \033\[0J |
|
|
} |
|
|
|
|
|
proc clear_all {} { |
|
|
# - doesn't work?? |
|
|
return \033\[3J |
|
|
} |
|
|
#see also erase_ functions |
|
|
# -- --- --- --- --- |
|
|
|
|
|
proc cursor_on {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_on]] |
|
|
return "\033\[?25h" |
|
|
} |
|
|
proc cursor_off {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_off]] |
|
|
return "\033\[?25l" |
|
|
} |
|
|
|
|
|
# -- --- --- --- --- |
|
|
proc move {row col} { |
|
|
#*** !doctools |
|
|
#[call [fun move] [arg row] [arg col]] |
|
|
#[para]Return an ansi sequence to move to row,col |
|
|
#[para]aka cursor home |
|
|
return \033\[${row}\;${col}H |
|
|
} |
|
|
proc move_emit {row col data args} { |
|
|
#*** !doctools |
|
|
#[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] |
|
|
#[para]Return an ansi string representing a move to row col with data appended |
|
|
#[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points |
|
|
#[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout |
|
|
#[para]punk::console::move_emit_return will also return the cursor to the original position |
|
|
#[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. |
|
|
#[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. |
|
|
#[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin |
|
|
#[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. |
|
|
#[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: |
|
|
#[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] |
|
|
#[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. |
|
|
|
|
|
set out "" |
|
|
if {$row eq "this"} { |
|
|
append out \033\[\;${col}G$data |
|
|
} else { |
|
|
append out \033\[${row}\;${col}H$data |
|
|
} |
|
|
foreach {row col data} $args { |
|
|
if {$row eq "this"} { |
|
|
append out \033\[\;${col}G$data |
|
|
} else { |
|
|
append out \033\[${row}\;${col}H$data |
|
|
} |
|
|
} |
|
|
return $out |
|
|
} |
|
|
proc move_forward {{n 1}} { |
|
|
#*** !doctools |
|
|
#[call [fun move_forward] [arg n]] |
|
|
return \033\[${n}C |
|
|
} |
|
|
proc move_back {{n 1}} { |
|
|
#*** !doctools |
|
|
#[call [fun move_back] [arg n]] |
|
|
return \033\[${n}D |
|
|
} |
|
|
proc move_up {{n 1}} { |
|
|
#*** !doctools |
|
|
#[call [fun move_up] [arg n]] |
|
|
return \033\[${n}A |
|
|
} |
|
|
proc move_down {{n 1}} { |
|
|
#*** !doctools |
|
|
#[call [fun move_down] [arg n]] |
|
|
return \033\[${n}B |
|
|
} |
|
|
proc move_column {col} { |
|
|
#*** !doctools |
|
|
#[call [fun move_column] [arg col]] |
|
|
return \x1b\[${col}G |
|
|
} |
|
|
proc move_row {row} { |
|
|
#*** !doctools |
|
|
#[call [fun move_row] [arg row]] |
|
|
#[para]VPA - Vertical Line Position Absolute |
|
|
return \x1b\[${row}d |
|
|
} |
|
|
# -- --- --- --- --- |
|
|
|
|
|
proc cursor_save {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_save]] |
|
|
#[para] equivalent term::ansi::code::ctrl::sc |
|
|
return \x1b\[s |
|
|
} |
|
|
proc cursor_restore {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_restore]] |
|
|
#[para] equivalent term::ansi::code::ctrl::rc |
|
|
return \x1b\[u |
|
|
} |
|
|
proc cursor_save_attributes {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_save_attributes]] |
|
|
#[para] equivalent term::ansi::code::ctrl::sca |
|
|
return \x1b7 |
|
|
} |
|
|
proc cursor_restore_attributes {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_restore_attributes]] |
|
|
#[para] equivalent term::ansi::code::ctrl::rca |
|
|
return \x1b8 |
|
|
} |
|
|
|
|
|
# -- --- --- --- --- |
|
|
proc erase_line {} { |
|
|
#*** !doctools |
|
|
#[call [fun erase_line]] |
|
|
return \033\[2K |
|
|
} |
|
|
proc erase_sol {} { |
|
|
#*** !doctools |
|
|
#[call [fun erase_sol]] |
|
|
#[para]Erase to start of line, leaving cursor position alone. |
|
|
return \033\[1K |
|
|
} |
|
|
proc erase_eol {} { |
|
|
#*** !doctools |
|
|
#[call [fun erase_eol]] |
|
|
return \033\[K |
|
|
} |
|
|
#see also clear_above clear_below |
|
|
# -- --- --- --- --- |
|
|
|
|
|
proc scroll_up {n} { |
|
|
#*** !doctools |
|
|
#[call [fun scroll_up] [arg n]] |
|
|
return \x1b\[${n}S |
|
|
} |
|
|
proc scroll_down {n} { |
|
|
#*** !doctools |
|
|
#[call [fun scroll_down] [arg n]] |
|
|
return \x1b\[${n}T |
|
|
} |
|
|
|
|
|
proc insert_spaces {count} { |
|
|
#*** !doctools |
|
|
#[call [fun insert_spaces] [arg count]] |
|
|
return \x1b\[${count}@ |
|
|
} |
|
|
proc delete_characters {count} { |
|
|
#*** !doctools |
|
|
#[call [fun delete_characters] [arg count]] |
|
|
return \x1b\[${count}P |
|
|
} |
|
|
proc erase_characters {count} { |
|
|
#*** !doctools |
|
|
#[call [fun erase_characters] [arg count]] |
|
|
return \x1b\[${count}X |
|
|
} |
|
|
proc insert_lines {count} { |
|
|
#*** !doctools |
|
|
#[call [fun insert_lines] [arg count]] |
|
|
return \x1b\[${count}L |
|
|
} |
|
|
proc delete_lines {count} { |
|
|
#*** !doctools |
|
|
#[call [fun delete_lines] [arg count]] |
|
|
return \x1b\[${count}M |
|
|
} |
|
|
|
|
|
proc cursor_pos {} { |
|
|
#*** !doctools |
|
|
#[call [fun cursor_pos]] |
|
|
#[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin |
|
|
#[para]The output on screen will look something like ^[lb][lb]47;3R |
|
|
#[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. |
|
|
#[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. |
|
|
#[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list |
|
|
return \033\[6n |
|
|
} |
|
|
|
|
|
proc cursor_pos_extended {} { |
|
|
#includes page e.g ^[[47;3;1R |
|
|
return \033\[?6n |
|
|
} |
|
|
|
|
|
|
|
|
#DECFRA - Fill rectangular area |
|
|
#REVIEW - vt100 accepts decimal values 132-126 and 160-255 ("in the current GL or GR in-use table") |
|
|
#some modern terminals accept and display characters outside this range - but this needs investigation. |
|
|
#in a modern unicode era - the restricted range doesn't make a lot of sense - but we need to see what terminal emulators actually do. |
|
|
#e.g what happens with double-width? |
|
|
#this wrapper accepts a char rather than a decimal value |
|
|
proc fill_rect {char t l b r} { |
|
|
set dec [scan $char %c] |
|
|
return \x1b\[$dec\;$t\;$l\;$b\;$r\$x |
|
|
} |
|
|
#DECFRA with decimal char value |
|
|
proc fill_rect_dec {decimal t l b r} { |
|
|
return \x1b\[$decimal\;$t\;$l\;$b\;$r\$x |
|
|
} |
|
|
|
|
|
proc checksum_rect {id page t l b r} { |
|
|
return "\x1b\[${id}\;${page}\;$t\;$l\;$b\;$r*y" |
|
|
} |
|
|
|
|
|
proc request_cursor_information {} { |
|
|
#*** !doctools |
|
|
#[call [fun request_cursor_information]] |
|
|
#[para]DECRQPSR (DEC Request Presentation State Report) for DECCCIR Cursor Information report |
|
|
#[para]When written to the terminal, this sequence causes the terminal to emit cursor information to stdin |
|
|
#[para]A stdin readloop will need to be in place to read this information |
|
|
return \x1b\[1\$w |
|
|
} |
|
|
proc request_tabstops {} { |
|
|
#*** !doctools |
|
|
#[call [fun request_tabstops]] |
|
|
#[para]DECRQPSR (DEC Request Presentation State Report) for DECTABSR Tab stop report |
|
|
#[para]When written to the terminal, this sequence causes the terminal to emit tabstop information to stdin |
|
|
return \x1b\[2\$w |
|
|
} |
|
|
|
|
|
|
|
|
#alternative to string terminator is \007 - |
|
|
proc titleset {windowtitle} { |
|
|
#*** !doctools |
|
|
#[call [fun titleset] [arg windowtitles]] |
|
|
#[para]Returns the code to set the title of the terminal window to windowtitle |
|
|
#[para]This may not work on terminals which have multiple panes/windows |
|
|
return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives |
|
|
} |
|
|
#titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title |
|
|
#no cross-platform ansi-only mechanism ? |
|
|
|
|
|
proc test_decaln {} { |
|
|
#Screen Alignment Test |
|
|
#Reset margins, move cursor to the top left, and fill the screen with 'E' |
|
|
#(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) |
|
|
return \x1b#8 |
|
|
} |
|
|
|
|
|
#length of text for printing characters only |
|
|
#- unicode and other non-printing chars and combining sequences should be handled by the ansifreestring_width call at the end. |
|
|
#certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names |
|
|
#review - is there an existing library or better method? printing to a terminal and querying cursor position is relatively slow and terminals lie. |
|
|
#Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first |
|
|
#If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. |
|
|
proc printing_length {line} { |
|
|
if {[string first \n $line] >= 0} { |
|
|
error "line_print_length must not contain newline characters" |
|
|
} |
|
|
#what if line has \v (vertical tab) ie more than one logical screen line? |
|
|
|
|
|
#review - detect ansi moves and warn/error? They would invalidate this algorithm |
|
|
#for a string with ansi moves - we would need to use the overtype::renderline function (which is a bit heavier) |
|
|
#arguably - \b and \r are cursor move operations too - so processing them here is not very symmetrical - review |
|
|
#the purpose of backspace (or line cr) in embedded text is unclear. Should it allow some sort of character combining/overstrike as it has sometimes done historically (nroff/less)? e.g a\b` as an alternative combiner or bolding if same char |
|
|
#This should presumably only be done if the over_strike (os) capability is enabled in the terminal. Either way - it presumably won't affect printing width? |
|
|
set line [punk::ansi::stripansi $line] |
|
|
#we can't use simple \b processing if we get ansi codes and aren't actually processing them (e.g moves) |
|
|
|
|
|
set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi |
|
|
#backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter |
|
|
#(* more correctly - moves cursor back) |
|
|
#Note some terminals process backspace before \v - which seems quite wrong |
|
|
#backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already |
|
|
#leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line |
|
|
# - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. |
|
|
#curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS |
|
|
|
|
|
#Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) |
|
|
#for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces |
|
|
#normalize tabs to an appropriate* width |
|
|
#*todo - handle terminal/context where tabwidth != the default 8 spaces |
|
|
if {[string first \t $line] >= 0} { |
|
|
set line [textutil::tabify::untabify2 $line] |
|
|
} |
|
|
|
|
|
#NOTE - this is non-destructive backspace as it occurs in text blocks - and is likely different to the sequence coming from a terminal or editor which generally does a destructive backspace |
|
|
#e.g |
|
|
#This means for example that abc\b has a length of 3. Trailing or leading backslashes have no effect |
|
|
|
|
|
#set bs [format %c 0x08] |
|
|
#set line [string map [list "\r\b" "\r"] $line] ;#backsp following a \r will have no effect |
|
|
set line [string trim $line \b] ;#take off at start and tail only |
|
|
|
|
|
#counterintuitively "x\b" still shows the x ie length is still one. The backspace just moves the position. There must be a char following \b for it to affect the length. |
|
|
#(we are not interested in the destructive backspace case present in editors,terminals etc - that is a different context) |
|
|
set n 0 |
|
|
|
|
|
#set chars [split $line ""] ; #review - graphemes vs chars? Terminals differ in how they treat this. |
|
|
set chars [punk::char::grapheme_split $line] |
|
|
set cr_posns [lsearch -all $chars \r] |
|
|
set bs_posns [lsearch -all $chars \b] |
|
|
foreach p $cr_posns { |
|
|
lset chars $p <cr> |
|
|
} |
|
|
foreach p $bs_posns { |
|
|
lset chars $p <bs> |
|
|
} |
|
|
|
|
|
#mintty seems more 'correct'. It will backspace over an entire grapheme (char+combiners) whereas windows terminal/wezterm etc remove a combiner |
|
|
#build an output |
|
|
set idx 0 |
|
|
set outchars [list] |
|
|
set outsizes [list] |
|
|
# -- |
|
|
#tcl8.6/8.7 we can get a fast byte-compiled switch statement only with literals in the source code |
|
|
#this is difficult/risky to maintain - hence the lsearch and grapheme-replacement above |
|
|
#we could reasonably do it with backspace - but cr is more difficult |
|
|
#note that \x08 \b etc won't work to create a compiled switch statement even with unbraced (separate argument) form of switch statement. |
|
|
#set bs "" |
|
|
#set cr ? |
|
|
# -- |
|
|
foreach c $chars { |
|
|
switch -- $c { |
|
|
<bs> { |
|
|
if {$idx > 0} { |
|
|
incr idx -1 |
|
|
} |
|
|
} |
|
|
<cr> { |
|
|
set idx 0 |
|
|
} |
|
|
default { |
|
|
#set nxt [llength $outchars] |
|
|
if {$idx < [llength $outchars]} { |
|
|
#overstrike? - should usually have no impact on width - width taken as last grapheme in that column |
|
|
#e.g nroff would organise text such that underline written first, then backspace, then the character - so that terminals without overstrike would display something useful if no overstriking is done |
|
|
#Conceivably double_wide_char then backspace then underscore would underreport the length if overstriking were intended. |
|
|
lset outchars $idx $c |
|
|
} else { |
|
|
lappend outchars $c |
|
|
} |
|
|
#punk::ansi::internal::printing_length_addchar $idx $c |
|
|
incr idx |
|
|
} |
|
|
} |
|
|
} |
|
|
return [punk::char::ansifreestring_width [join $outchars ""]] |
|
|
} |
|
|
|
|
|
|
|
|
#*** !doctools |
|
|
#[list_end] [comment {--- end definitions namespace punk::ansi ---}] |
|
|
} |
|
|
|
|
|
namespace eval punk::ansi { |
|
|
|
|
|
|
|
|
# -- --- --- --- --- --- |
|
|
#XTGETTCAP |
|
|
# xterm responds with |
|
|
# DCS 1 + r Pt ST for valid requests, adding to Pt an = , and |
|
|
# the value of the corresponding string that xterm would send, |
|
|
# or |
|
|
# DCS 0 + r ST for invalid requests. |
|
|
# The strings are encoded in hexadecimal (2 digits per |
|
|
# character). If more than one name is given, xterm replies |
|
|
# with each name/value pair in the same response. An invalid |
|
|
# name (one not found in xterm's tables) ends processing of the |
|
|
# list of names. |
|
|
proc xtgetcap {keylist} { |
|
|
#ESC P = 0x90 = DCS = Device Control String |
|
|
set hexkeys [list] |
|
|
foreach k $keylist { |
|
|
lappend hexkeys [util::str2hex $k] |
|
|
} |
|
|
set payload [join $hexkeys ";"] |
|
|
return "\x1bP+q$payload\x1b\\" |
|
|
} |
|
|
proc xtgetcap2 {keylist} { |
|
|
#ESC P = 0x90 = DCS = Device Control String |
|
|
set hexkeys [list] |
|
|
foreach k $keylist { |
|
|
lappend hexkeys [util::str2hex $k] |
|
|
} |
|
|
set payload [join $hexkeys ";"] |
|
|
return "\u0090+q$payload\u009c" |
|
|
} |
|
|
namespace eval codetype { |
|
|
#Functions that are primarily intended to operate on a single ansi code sequence - rather than a sequence, or codes embedded in another string |
|
|
#in some cases multiple sequences or leading trailing strings are ok - but the proc docs should note where the function is looking |
|
|
#review - separate namespace for functions that operate on multiple or embedded? |
|
|
proc is_sgr {code} { |
|
|
#SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline |
|
|
#we will accept and pass through the less common colon separator (ITU Open Document Architecture) |
|
|
#Terminals should generally ignore it if they don't use it |
|
|
regexp {\033\[[0-9;:]*m$} $code |
|
|
} |
|
|
|
|
|
#review - has_cursor_move_in_line? Are we wanting to allow strings/sequences and detect that there are no moves that *aren't* within line? |
|
|
proc is_cursor_move_in_line {code {knownline ""}} { |
|
|
if {[regexp {\033\[[0-9]*(:?C|D|G)$} $code]} { |
|
|
return 1 |
|
|
} |
|
|
if {[string is integer -strict $knownline]} { |
|
|
#CSI n : m H where row n happens to be current line - review/test |
|
|
set re [string map [list %n% $knownline] {\x1b\[%n%:[0-9]*H$}] |
|
|
if {[regexp $re $code]} { |
|
|
return 1 |
|
|
} |
|
|
} |
|
|
return 0 |
|
|
} |
|
|
#pure SGR reset with no other functions |
|
|
proc is_sgr_reset {code} { |
|
|
#*** !doctools |
|
|
#[call [fun is_sgr_reset] [arg code]] |
|
|
#[para]Return a boolean indicating whether this string has a trailing pure SGR reset |
|
|
#[para]Note that if the reset is not the very last item in the string - it will not be detected. |
|
|
#[para]This is primarily intended for testing a single ansi code sequence, but code can be any string where the trailing SGR code is to be tested. |
|
|
|
|
|
#todo 8-bit csi |
|
|
regexp {\x1b\[0*m$} $code |
|
|
} |
|
|
|
|
|
|
|
|
#whether this code has 0 (or equivalently empty) parameter (but may set others) |
|
|
#if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes |
|
|
#it generally only makes sense for the reset to be the first parameter - otherwise the code has ineffective portions |
|
|
#However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. |
|
|
#We only look at the initial parameter within the trailing SGR code as this is the well-formed normal case. |
|
|
|
|
|
#Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code |
|
|
proc has_sgr_leadingreset {code} { |
|
|
#*** !doctools |
|
|
#[call [fun has_sgr_leadingreset] [arg code]] |
|
|
#[para]The reset must be the very first item in code to be detected. Trailing strings/codes ignored. |
|
|
set params "" |
|
|
#we need non-greedy |
|
|
if {[regexp {^\033\[([^m]*)m} $code _match params]} { |
|
|
#must match trailing m to be the type of reset we're looking for |
|
|
set plist [split $params ";"] |
|
|
if {[string trim [lindex $plist 0] 0] eq ""} { |
|
|
#e.g \033\[m \033\[0\;...m \033\[0000...m |
|
|
return 1 |
|
|
} else { |
|
|
return 0 |
|
|
} |
|
|
} else { |
|
|
return 0 |
|
|
} |
|
|
} |
|
|
proc is_gx {code} { |
|
|
#g0 {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} |
|
|
#g1 {(?:\x1b\)0)(?:(?!\x1b\)B).)*\x1b\)B} |
|
|
regexp {\x1b(?:\(0(?:(?:(?!\x1b\(B).)*\x1b\(B)|\)0(?:(?:(?!\x1b\)B).)*\x1b\)B))} $code |
|
|
} |
|
|
proc is_gx_open {code} { |
|
|
#todo g2,g3? |
|
|
#pin to start and end with ^ and $ ? |
|
|
#regexp {\x1b\(0|\x1b\)0} $code |
|
|
regexp {\x1b(?:\(0|\)0)} $code |
|
|
} |
|
|
proc is_gx_close {code} { |
|
|
#regexp {\x1b\(B|\x1b\)B} $code |
|
|
regexp {\x1b(?:\(B|\)B)} $code |
|
|
} |
|
|
|
|
|
#has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? |
|
|
|
|
|
} |
|
|
namespace eval sequence_type { |
|
|
proc is_Fe {code} { |
|
|
# C1 control codes |
|
|
if {[regexp {^\033\[[\u0040-\u005F]}]} { |
|
|
#7bit - typical case |
|
|
return 1 |
|
|
} |
|
|
#8bit |
|
|
#review - all C1 escapes ? 0x80-0x90F |
|
|
#This is possibly problematic as it is affected by encoding. |
|
|
#According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit |
|
|
#"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." |
|
|
return 0 |
|
|
} |
|
|
proc is_Fs {code} { |
|
|
puts stderr "is_Fs unimplemented" |
|
|
} |
|
|
} |
|
|
# -- --- --- --- --- --- --- --- --- --- --- |
|
|
#todo - implement colour resets like the perl module: |
|
|
#https://metacpan.org/pod/Text::ANSI::Util |
|
|
#(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) |
|
|
} |
|
|
|
|
|
|
|
|
namespace eval punk::ansi::ta { |
|
|
#*** !doctools |
|
|
#[subsection {Namespace punk::ansi::ta}] |
|
|
#[para] text ansi functions |
|
|
#[para] based on but not identical to the Perl Text Ansi module: |
|
|
#[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm |
|
|
#[list_begin definitions] |
|
|
namespace path ::punk::ansi |
|
|
|
|
|
#handle both 7-bit and 8-bit csi |
|
|
#review - does codepage affect this? e.g ebcdic has 8bit csi in different position |
|
|
|
|
|
#CSI |
|
|
#variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m |
|
|
variable re_csi_open {(?:\x1b\[|\u009b)} |
|
|
#variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@\^_\{|\}\[\]~`]} |
|
|
variable re_csi_code {(?:\x1b\[|\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]} |
|
|
|
|
|
#intermediate bytes range 0x20-0x2F (ascii space and !"#$%&'()*+,-./) |
|
|
#parameter bytes range 0x30-0x3F (ascii 0-9:;<=>?) |
|
|
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). |
|
|
|
|
|
#colour and style |
|
|
variable re_sgr {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m |
|
|
|
|
|
#OSC - termnate with BEL (\a \007) or ST (string terminator \x1b\\) |
|
|
# 8-byte string terminator is \x9c (\u009c) |
|
|
|
|
|
#non-greedy by excluding ST terminators |
|
|
variable re_esc_osc1 {(?:\x1b\])(?:[^\007]*)\007} |
|
|
#variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} ;#somewhat wrong - we want to exclude the ST - not other esc sequences |
|
|
variable re_esc_osc2 {(?:\x1b\])(?:(?!\x1b\\).)*\x1b\\} |
|
|
variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} |
|
|
variable re_osc_open {(?:\x1b\]|\u009d).*} |
|
|
|
|
|
|
|
|
variable standalone_code_map [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] |
|
|
variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} |
|
|
|
|
|
#if we don't split on altgraphics too and separate them out - it's easy to get into a horrible mess |
|
|
variable re_altg0_group {(?:\x1b\(0)(?:(?!\x1b\(B).)*\x1b\(B} |
|
|
variable re_altg0_open {(?:\x1b\(0)} |
|
|
variable re_altg0_close {(?:\x1b\(B)} |
|
|
|
|
|
# DCS "ESC P" or "0x90" is also terminated by ST |
|
|
set re_ST_open {(?:\033P|\u0090|\033X|\u0098|\033\^|\u009e|\033_|\u009f)} |
|
|
#ST terminators [list \007 \033\\ \u009c] |
|
|
|
|
|
#regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) |
|
|
#non-greedy by exclusion of ST terminators in body |
|
|
#we need to match \x1b\\ not just \x1b There could be colour codes nested in a privacy msg/string |
|
|
#even if terminals generally don't support that - it's quite possible for an ansi code to get nested this way - and we'd prefer it not to break our splits |
|
|
#Just checking for \x1b will terminate the match too early |
|
|
#we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) |
|
|
#variable re_ST {(?:\x1bX|\u0098|\x1b\^|\u009E|\x1b_|\u009F)(?:[^\x1b\007\u009c]*)(?:\x1b\\|\007|\u009c)} ;#downsides: early terminating with nests, mixes 7bit 8bit start/ends (does that exist in the wild?) |
|
|
#keep our 8bit/7bit start-end codes separate |
|
|
variable re_ST {(?:\x1bP|\x1bX|\x1b\^|\x1b_)(?:(?!\x1b\\|007).)*(?:\x1b\\|\007)|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)} |
|
|
|
|
|
|
|
|
|
|
|
#consider standalones as self-opening/self-closing - therefore included in both ansi_detect and ansi_detect_open |
|
|
|
|
|
#default for regexes is non-newline-sensitive matching - ie matches can span lines |
|
|
# -- --- --- --- |
|
|
variable re_ansi_detect1 "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" |
|
|
# -- --- --- --- |
|
|
#handrafted TRIE version of above. Somewhat difficult to construct and maintain. TODO - find a regext TRIE generator that works with Tcl regexes |
|
|
#This does make things quicker - but it's too early to finalise the detect/split regexes (e.g missing \U0090 ) - will need to be redone. |
|
|
variable re_ansi_detect {(?:\x1b(?:\((?:0|B)|\[(?:[\x20-\x2f\x30-\x3f]*[\x40-\x7e])|\](?:(?:[^\007]*)\007|(?:(?!\x1b\\).)*\x1b\\)|(?:P|X|\^|_)(?:(?:(?!\x1b\\|007).)*(?:\x1b\\|\007))|c|7|8|M|E|D|H|=|>|(?:#(?:3|4|5|6|8))))|(?:\u0090|\u0098|\u009E|\u009F)(?:(?!\u009c).)*(?:\u009c)|(?:\u009b)[\x20-\x2f\x30-\x3f]*[\x40-\x7e]|(?:\u009d)(?:[^\u009c]*)?\u009c} |
|
|
# -- --- --- --- |
|
|
|
|
|
|
|
|
|
|
|
variable re_ansi_detect_open "${re_csi_open}|${re_osc_open}|${re_standalones}|${re_ST_open}|${re_altg0_open}" |
|
|
|
|
|
#may be same as detect - kept in case detect needs to diverge |
|
|
#variable re_ansi_split "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_esc_osc3}|${re_standalones}|${re_ST}|${re_altg0_open}|${re_altg0_close}" |
|
|
set re_ansi_split $re_ansi_detect |
|
|
|
|
|
#detect any ansi escapes |
|
|
#review - only detect 'complete' codes - or just use the opening escapes for performance? |
|
|
proc detect {text} { |
|
|
#*** !doctools |
|
|
#[call [fun detect] [arg text]] |
|
|
#[para]Return a boolean indicating whether Ansi codes were detected in text |
|
|
#[para] |
|
|
|
|
|
variable re_ansi_detect |
|
|
expr {[regexp $re_ansi_detect $text]} |
|
|
} |
|
|
proc detect2 {text} { |
|
|
variable re_ansi_detect2 |
|
|
expr {[regexp $re_ansi_detect2 $text]} |
|
|
} |
|
|
|
|
|
|
|
|
proc detect_open {text} { |
|
|
variable re_ansi_detect_open |
|
|
expr {[regexp $re_ansi_detect_open $text]} |
|
|
} |
|
|
|
|
|
#not in perl ta |
|
|
proc detect_csi {text} { |
|
|
#*** !doctools |
|
|
#[call [fun detect_csi] [arg text]] |
|
|
#[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text |
|
|
#[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] |
|
|
#[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation |
|
|
#[para]There is also a multi-byte escape sequence \u009b |
|
|
#[para]This is less commonly used but is also detected here |
|
|
#[para](This function is not in perl ta) |
|
|
variable re_csi_open |
|
|
expr {[regexp $re_csi_open $text]} |
|
|
} |
|
|
proc detect_sgr {text} { |
|
|
#*** !doctools |
|
|
#[call [fun detect_sgr] [arg text]] |
|
|
#[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. |
|
|
#[para]This is the set of CSI sequences ending in 'm' |
|
|
#[para]This is most commonly an Ansi colour code - but also things such as underline and italics |
|
|
#[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. |
|
|
#[para](This function is not in perl ta) |
|
|
variable re_sgr |
|
|
expr {[regexp $re_sgr $text]} |
|
|
} |
|
|
proc strip {text} { |
|
|
#*** !doctools |
|
|
#[call [fun strip] [arg text]] |
|
|
#[para]Return text stripped of Ansi codes |
|
|
#[para]This is a tailcall to punk::ansi::stripansi |
|
|
tailcall stripansi $text |
|
|
} |
|
|
proc length {text} { |
|
|
#*** !doctools |
|
|
#[call [fun length] [arg text]] |
|
|
#[para]Return the character length after stripping ansi codes - not the printing length |
|
|
string length [stripansi $text] |
|
|
} |
|
|
#todo - handle newlines |
|
|
#not in perl ta |
|
|
#proc printing_length {text} { |
|
|
# |
|
|
#} |
|
|
|
|
|
proc trunc {text width args} { |
|
|
|
|
|
} |
|
|
|
|
|
#not in perl ta |
|
|
#returns just the plaintext portions in a list |
|
|
proc split_at_codes {text} { |
|
|
variable re_ansi_split |
|
|
punk::ansi::internal::splitx $text ${re_ansi_split} |
|
|
} |
|
|
|
|
|
# -- --- --- --- --- --- |
|
|
#Split $text to a list containing alternating ANSI color codes and text. |
|
|
#ANSI color codes are always on the second element, fourth, and so on. |
|
|
#(ie plaintext on odd list-indices ansi on even indices) |
|
|
# Example: |
|
|
#ta_split_codes "" # => "" |
|
|
#ta_split_codes "a" # => "a" |
|
|
#ta_split_codes "a\e[31m" # => {"a" "\e[31m"} |
|
|
#ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} |
|
|
#ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} |
|
|
#ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} |
|
|
#ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} |
|
|
# |
|
|
proc split_codes {text} { |
|
|
variable re_ansi_split |
|
|
set re "(?:${re_ansi_split})+" |
|
|
return [_perlish_split $re $text] |
|
|
} |
|
|
#like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) |
|
|
proc split_codes_single {text} { |
|
|
variable re_ansi_split |
|
|
return [_perlish_split $re_ansi_split $text] |
|
|
} |
|
|
|
|
|
#review - tcl greedy expressions may match multiple in one element |
|
|
proc _perlish_split {re text} { |
|
|
if {[string length $text] == 0} { |
|
|
return {} |
|
|
} |
|
|
set list [list] |
|
|
set start 0 |
|
|
|
|
|
#We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW |
|
|
while {[regexp -start $start -indices -- $re $text match]} { |
|
|
lassign $match matchStart matchEnd |
|
|
#puts "->start $start ->match $matchStart $matchEnd" |
|
|
if {$matchEnd < $matchStart} { |
|
|
lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] |
|
|
incr start |
|
|
if {$start >= [string length $text]} { |
|
|
break |
|
|
} |
|
|
continue |
|
|
} |
|
|
lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] |
|
|
set start [expr {$matchEnd+1}] |
|
|
#? |
|
|
if {$start >= [string length $text]} { |
|
|
break |
|
|
} |
|
|
} |
|
|
return [lappend list [string range $text $start end]] |
|
|
} |
|
|
proc _perlish_split2 {re text} { |
|
|
if {[string length $text] == 0} { |
|
|
return {} |
|
|
} |
|
|
set list [list] |
|
|
set start 0 |
|
|
#We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW |
|
|
while {[regexp -start $start -indices -- $re $text match]} { |
|
|
lassign $match matchStart matchEnd |
|
|
#puts "->start $start ->match $matchStart $matchEnd" |
|
|
if {$matchEnd < $matchStart} { |
|
|
lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] |
|
|
incr start |
|
|
} else { |
|
|
lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] |
|
|
set start [expr {$matchEnd+1}] |
|
|
} |
|
|
if {$start >= [string length $text]} { |
|
|
break |
|
|
} |
|
|
} |
|
|
return [lappend list [string range $text $start end]] |
|
|
} |
|
|
proc _ws_split {text} { |
|
|
regexp -all -inline {(?:\S+)|(?:\s+)} $text |
|
|
} |
|
|
# -- --- --- --- --- --- |
|
|
|
|
|
#*** !doctools |
|
|
#[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] |
|
|
} |
|
|
# -- --- --- --- --- --- --- --- --- --- --- |
|
|
|
|
|
namespace eval punk::ansi::ansistring { |
|
|
#*** !doctools |
|
|
#[subsection {Namespace punk::ansi::ansistring}] |
|
|
#[para]punk::ansi::ansistring ensemble - ansi-aware string operations |
|
|
#[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings |
|
|
#[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. |
|
|
#[list_begin definitions] |
|
|
|
|
|
namespace path [list ::punk::ansi ::punk::ansi::ta] |
|
|
namespace ensemble create |
|
|
namespace export length length1 trim trimleft trimright index VIEW VIEWCODES INDEXABSOLUTE INDEXCOLUMNS COLUMNINDEX |
|
|
#todo - expose _splits_ methods so caller can work efficiently with the splits themselves |
|
|
#we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single |
|
|
|
|
|
#\UFFFD - replacement char or \U2426 |
|
|
|
|
|
#using ISO 2047 graphical representations of control characters - probably obsolete? |
|
|
#00 NUL Null ⎕ U+2395 NU |
|
|
#01 TC1, SOH Start of Heading ⌈ U+2308 SH |
|
|
#02 TC2, STX Start of Text ⊥ U+22A5 SX |
|
|
#03 TC3, ETX End of Text ⌋ U+230B EX |
|
|
#04 TC4, EOT End of Transmission ⌁ U+2301[9] ET |
|
|
#05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ |
|
|
#06 TC6, ACK Acknowledge ✓ U+2713 AK |
|
|
#07 BEL Bell ⍾ U+237E[9] BL |
|
|
#08 FE0, BS Backspace ⤺ —[b] BS |
|
|
#09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT |
|
|
#0A FE2, LF Line Feed ≡ U+2261 LF |
|
|
#0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT |
|
|
#0C FE4, FF Form Feed ↡ U+21A1 FF |
|
|
#0D FE5, CR Carriage Return ⪪ U+2AAA CR |
|
|
#0E SO Shift Out ⊗ U+2297 SO |
|
|
#0F SI Shift In ⊙ U+2299 SI |
|
|
#10 TC7, DLE Data Link Escape ⊟ U+229F DL |
|
|
#11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 |
|
|
#12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 |
|
|
#13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 |
|
|
#14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 |
|
|
#15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK |
|
|
#16 TC9, SYN Synchronization ⎍ U+238D SY |
|
|
#17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB |
|
|
#18 CAN Cancel ⧖ U+29D6 CN |
|
|
#19 EM End of Medium ⍿ U+237F[9] EM |
|
|
#1A SUB Substitute Character ␦ U+2426[12] SB |
|
|
#1B ESC Escape ⊖ U+2296 EC |
|
|
#1C IS4, FS File Separator ◰ U+25F0 FS |
|
|
#1D IS3, GS Group Separator ◱ U+25F1 GS |
|
|
#1E IS2, RS Record Separator ◲ U+25F2 RS |
|
|
#1F IS1 US Unit Separator ◳ U+25F3 US |
|
|
#20 SP Space △ U+25B3 SP |
|
|
#7F DEL Delete ▨ —[d] DT |
|
|
|
|
|
#C0 control code visual representations |
|
|
# Code Val Name 2X Description |
|
|
# 2400 00 NUL NU Symbol for Null |
|
|
# 2401 01 SOH SH Symbol for Start of Heading |
|
|
# 2402 02 STX SX Symbol for Start of Text |
|
|
# 2403 03 ETX EX Symbol for End of Text |
|
|
# 2404 04 EOT ET Symbol for End of Transmission |
|
|
# 2405 05 ENQ EQ Symbol for Enquiry |
|
|
# 2406 06 ACK AK Symbol for Acknowledge |
|
|
# 2407 07 BEL BL Symbol for Bell |
|
|
# 2409 09 BS BS Symbol for Backspace |
|
|
# 2409 09 HT HT Symbol for Horizontal Tab (1) |
|
|
# 240A 0A LF LF Symbol for Line Feed (1) |
|
|
# 240B 0B VT VT Symbol for Vertical Tab (1) |
|
|
# 240C 0C FF FF Symbol for Form Feed (2) |
|
|
# 240D 0D CR CR Symbol for Carriage Return (1) |
|
|
# 240E 0E SO SO Symbol for Shift Out |
|
|
# 240F 0F SI SI Symbol for Shift In |
|
|
# 2410 10 DLE DL Symbol for Data Link Escape |
|
|
# 2411 11 DC1 D1 Symbol for Device Control 1 (2) |
|
|
# 2412 12 DC2 D2 Symbol for Device Control 2 (2) |
|
|
# 2413 13 DC3 D3 Symbol for Device Control 3 (2) |
|
|
# 2414 14 DC4 D4 Symbol for Device Control 4 (2) |
|
|
# 2415 15 NAK NK Symbol for Negative Acknowledge |
|
|
# 2416 16 SYN SY Symbol for Synchronous Idle |
|
|
# 2417 17 ETB EB Symbol for End of Transmission Block |
|
|
# 2418 18 CAN CN Symbol for Cancel |
|
|
# 2419 19 EM EM Symbol for End of Medium |
|
|
# 241A 1A SUB SU Symbol for Substitute |
|
|
# 241B 1B ESC EC Symbol for Escape |
|
|
# 241C 1C FS FS Symbol for Field Separator (3) |
|
|
# 241D 1D GS GS Symbol for Group Separator (3) |
|
|
# 241E 1E RS RS Symbol for Record Separator (3) |
|
|
# 241F 1F US US Symbol for Unit Separator (3) |
|
|
# 2420 20 SP SP Symbol for Space (4) |
|
|
# 2421 7F DEL DT Symbol for Delete (4) |
|
|
|
|
|
#C1 control code visual representations |
|
|
#Code Val Name 2X Description |
|
|
# 80 80 80 (1) |
|
|
# 81 81 81 (1) |
|
|
# E022 82 BPH 82 Symbol for Break Permitted Here (2) |
|
|
# E023 83 NBH 83 Symbol for No Break Here (2) |
|
|
# E024 84 IND IN Symbol for Index (3) |
|
|
# E025 85 NEL NL Symbol for Next Line (4) |
|
|
# E026 86 SSA SS Symbol for Start Selected Area |
|
|
# E027 87 ESA ES Symbol for End Selected Area |
|
|
# E028 88 HTS HS Symbol for Character Tabulation Set |
|
|
# E029 89 HTJ HJ Symbol for Character Tabulation with Justification |
|
|
# E02A 8A VTS VS Symbol for Line Tabulation Set |
|
|
# E02B 8B PLD PD Symbol for Partial Line Forward |
|
|
# E02C 8C PLU PU Symbol for Partial Line Backward |
|
|
# E02D 8D RI RI Symbol for Reverse Line Feed |
|
|
# E02E 8E SS2 S2 Symbol for Single Shift 2 |
|
|
# E02F 8F SS3 S3 Symbol for Single Shift 3 |
|
|
# E030 90 DCS DC Symbol for Device Control String |
|
|
# E031 91 PU1 P1 Symbol for Private Use 1 |
|
|
# E032 92 PU2 P2 Symbol for Private Use 2 |
|
|
# E033 93 STS SE Symbol for Set Transmit State |
|
|
# E034 94 CCH CC Symbol for Cancel Character |
|
|
# E035 95 MW MW Symbol for Message Waiting |
|
|
# E036 96 SPA SP Symbol for Start Protected (Guarded) Area |
|
|
# E037 97 EPA EP Symbol for End Protected (Guarded) Area |
|
|
# E038 98 SOS 98 Symbol for Start of String (2) |
|
|
# 99 99 (1) |
|
|
# E03A 9A SCI 9A Symbol for Single Character Introducer (2) |
|
|
# E03B 9B CSI CS Symbol for Control Sequence Introducer (5) |
|
|
# E03C 9C ST ST Symbol for String Terminator |
|
|
# E03D 9D OSC OS Symbol for Operating System Command |
|
|
# E03E 9E PM PM Symbol for Privacy Message |
|
|
# E03F 9F APC AP Symbol for Application Program Command |
|
|
|
|
|
proc VIEW {args} { |
|
|
#*** !doctools |
|
|
#[call [fun VIEW] [arg string]] |
|
|
#[para]Return a string with specific ANSI control characters substituted with visual equivalents frome the appropriate unicode C0 and C1 visualisation sets |
|
|
#[para]For debugging purposes, certain other standard control characters are converted to visual representation, for example backspace (mapped to \\U2408 '\U2408') |
|
|
#[para]Horizontal tab is mapped to \\U2409 '\U2409'. For many of the punk terminal text operations, tabs have already been mapped to the appropriate number of spaces using textutil::tabify functions |
|
|
#[para]As punkshell uses linefeed where possible in preference to crlf even on windows, cr is mapped to \\U240D '\U240D' - but lf is left as is. |
|
|
|
|
|
if {![llength $args]} { |
|
|
return "" |
|
|
} |
|
|
|
|
|
set string [lindex $args end] |
|
|
set defaults [dict create\ |
|
|
-esc 1\ |
|
|
-cr 1\ |
|
|
-lf 0\ |
|
|
-vt 0\ |
|
|
-ht 1\ |
|
|
-bs 1\ |
|
|
-sp 1\ |
|
|
] |
|
|
set argopts [lrange $args 0 end-1] |
|
|
if {[llength $argopts] % 2 != 0} { |
|
|
error "ansistring VIEW options must be option-value pairs, received '$argopts'. Known opts [dict keys $defaults]" |
|
|
} |
|
|
set opts [dict merge $defaults $argopts] |
|
|
# -- --- --- --- --- |
|
|
set opt_esc [dict get $opts -esc] |
|
|
set opt_cr [dict get $opts -cr] |
|
|
set opt_lf [dict get $opts -lf] |
|
|
set opt_vt [dict get $opts -vt] |
|
|
set opt_ht [dict get $opts -ht] |
|
|
set opt_bs [dict get $opts -bs] |
|
|
set opt_sp [dict get $opts -sp] |
|
|
# -- --- --- --- --- |
|
|
|
|
|
|
|
|
#modern (c0 seem to have more terminal/font support - C1 can show 8bit c1 codes - but also seems to be limited support) |
|
|
|
|
|
#Goal is not to map every control character? |
|
|
#Map of which elements we want to convert - done this way so we can see names of control's that are included: - ease of maintenance compared to just creating the string map directly |
|
|
#ETX -ctrl-c |
|
|
#EOT ctrl-d (EOF?) |
|
|
#SYN ctrl-v |
|
|
#SUB ctrl-z |
|
|
#CAN ctrl-x |
|
|
#FS ctrl-\ (SIGQUIT) |
|
|
set visuals_interesting [dict create\ |
|
|
NUL [list \x00 \u2400]\ |
|
|
ETX [list \x03 \u2403]\ |
|
|
EOT [list \x04 \u2404]\ |
|
|
BEL [list \x07 \u2407]\ |
|
|
SYN [list \x16 \u2416]\ |
|
|
CAN [list \x18 \u2418]\ |
|
|
SUB [list \x1a \u241a]\ |
|
|
FS [list \x1c \u241c]\ |
|
|
SOS [list \x98 \ue038]\ |
|
|
CSI [list \x9b \ue03b]\ |
|
|
ST [list \x9c \ue03c]\ |
|
|
PM [list \x9e \ue03e]\ |
|
|
APC [list \x9f \ue03f]\ |
|
|
] |
|
|
#it turns out we need pretty much everything for debugging |
|
|
set visuals_c0 [dict create\ |
|
|
NUL [list \x00 \u2400]\ |
|
|
SOH [list \x01 \u2401]\ |
|
|
STX [list \x02 \u2402]\ |
|
|
ETX [list \x03 \u2403]\ |
|
|
EOT [list \x04 \u2404]\ |
|
|
ENQ [list \x05 \u2405]\ |
|
|
ACK [list \x06 \u2406]\ |
|
|
BEL [list \x07 \u2407]\ |
|
|
FF [list \x0c \u240c]\ |
|
|
SO [list \x0e \u240e]\ |
|
|
SF [list \x0f \u240f]\ |
|
|
DLE [list \x10 \u2410]\ |
|
|
DC1 [list \x11 \u2411]\ |
|
|
DC2 [list \x12 \u2412]\ |
|
|
DC3 [list \x13 \u2413]\ |
|
|
DC4 [list \x14 \u2414]\ |
|
|
NAK [list \x15 \u2415]\ |
|
|
SYN [list \x16 \u2416]\ |
|
|
ETB [list \x17 \u2417]\ |
|
|
CAN [list \x18 \u2418]\ |
|
|
EM [list \x19 \u2419]\ |
|
|
SUB [list \x1a \u241a]\ |
|
|
FS [list \x1c \u241c]\ |
|
|
GS [list \x1d \u241d]\ |
|
|
RS [list \x1e \u241e]\ |
|
|
US [list \x1f \u241f]\ |
|
|
DEL [list \x7f \u2421]\ |
|
|
] |
|
|
set visuals_c1 [dict create\ |
|
|
BPH [list \x82 \ue022]\ |
|
|
NBH [list \x83 \ue023]\ |
|
|
IND [list \x84 \ue024]\ |
|
|
NEL [list \x85 \ue025]\ |
|
|
SSA [list \x86 \ue026]\ |
|
|
ESA [list \x87 \ue027]\ |
|
|
HTS [list \x88 \ue028]\ |
|
|
HTJ [list \x89 \ue029]\ |
|
|
VTS [list \x8a \ue02a]\ |
|
|
PLD [list \x8b \ue02a]\ |
|
|
PLU [list \x8c \ue02c]\ |
|
|
RI [list \x8d \ue02d]\ |
|
|
SS2 [list \x8e \ue02e]\ |
|
|
SS3 [list \x8f \ue02f]\ |
|
|
DCS [list \x90 \ue030]\ |
|
|
PU1 [list \x91 \ue031]\ |
|
|
PU2 [list \x92 \ue032]\ |
|
|
STS [list \x93 \ue033]\ |
|
|
CCH [list \x94 \ue034]\ |
|
|
MW [list \x95 \ue035]\ |
|
|
SPA [list \x96 \ue036]\ |
|
|
EPA [list \x97 \ue037]\ |
|
|
SOS [list \x98 \ue038]\ |
|
|
SCI [list \x9a \ue03a]\ |
|
|
CSI [list \x9b \ue03b]\ |
|
|
ST [list \x9c \ue03c]\ |
|
|
OSC [list \x9d \ue03d]\ |
|
|
PM [list \x9e \ue03e]\ |
|
|
APC [list \x9f \ue03f]\ |
|
|
] |
|
|
|
|
|
set visuals_opt [dict create] |
|
|
if {$opt_esc} { |
|
|
dict set visuals_opt ESC [list \x1b \u241b] |
|
|
} |
|
|
if {$opt_cr} { |
|
|
dict set visuals_opt CR [list \x0d \u240d] |
|
|
} |
|
|
if {$opt_lf} { |
|
|
dict set visuals_opt LF [list \x0a \u240a] |
|
|
} |
|
|
if {$opt_vt} { |
|
|
dict set visuals_opt VT [list \x0b \u240b] |
|
|
} |
|
|
if {$opt_ht} { |
|
|
dict set visuals_opt HT [list \x09 \u2409] |
|
|
} |
|
|
if {$opt_bs} { |
|
|
dict set visuals_opt BS [list \x08 \u2408] |
|
|
} |
|
|
if {$opt_sp} { |
|
|
dict set visuals_opt SP [list \x20 \u2420] |
|
|
} |
|
|
|
|
|
set visuals [dict merge $visuals_opt $visuals_c0 $visuals_c1] |
|
|
set charmap [list] |
|
|
dict for {nm chars} $visuals { |
|
|
lappend charmap {*}$chars |
|
|
} |
|
|
return [string map $charmap $string] |
|
|
|
|
|
|
|
|
#test of ISO2047 - 7bit - limited set, limited support, somewhat obscure glyphs |
|
|
#return [string map [list \033 \U2296 \007 \U237E] $string] |
|
|
} |
|
|
proc VIEWCODES {string} { |
|
|
if {![llength $string]} { |
|
|
return "" |
|
|
} |
|
|
set redb [a+ red bold] |
|
|
set greenb [a+ green bold] |
|
|
set GX [a+ black White bold] |
|
|
set unk [a+ yellow bold] |
|
|
set RST [a] |
|
|
|
|
|
#don't split into lines first - \n is valid within ST sections |
|
|
set output "" |
|
|
set splits [punk::ansi::ta::split_codes_single $string] |
|
|
foreach {pt code} $splits { |
|
|
append output "$pt" |
|
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
|
append output ${greenb}RST$RST |
|
|
} elseif {[punk::ansi::codetype::is_gx_open $code]} { |
|
|
append output ${GX}GX+$RST |
|
|
} elseif {[punk::ansi::codetype::is_gx_close $code]} { |
|
|
append output ${GX}GX-$RST |
|
|
} elseif {[punk::ansi::codetype::is_sgr $code]} { |
|
|
append output ${greenb}[ansistring VIEW $code]$RST |
|
|
} else { |
|
|
append output ${unk}[ansistring VIEW $code]$RST |
|
|
} |
|
|
} |
|
|
return $output |
|
|
} |
|
|
|
|
|
proc length {string} { |
|
|
#*** !doctools |
|
|
#[call [fun length] [arg string]] |
|
|
#[para]Returns the length of the string without ansi codes |
|
|
#[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. |
|
|
#[para]This is equivalent to calling string length on the result of stripansi $string |
|
|
#[para]Note that this returns the number of characters in the payload (after applying combiners), and is not always the same as the width of the string as rendered on a terminal. |
|
|
#[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. |
|
|
|
|
|
#todo - combiners/diacritics? just map them away here? |
|
|
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+} |
|
|
set string [regsub -all $re_diacritics $string ""] |
|
|
|
|
|
#we want length to return number of glyphs.. not screen width. Has to be consistent with index function |
|
|
string length [stripansi $string] |
|
|
} |
|
|
#included as a test/verification - slightly slower. |
|
|
#grapheme split version may end up being used once it supports unicode grapheme clusters |
|
|
proc length2 {string} { |
|
|
#we want length to return number of glyphs.. not screen width. Has to be consistent with index function |
|
|
return [llength [punk::char::grapheme_split [stripansi $string]]] |
|
|
} |
|
|
|
|
|
proc trimleft {string args} { |
|
|
set intext 0 |
|
|
set out "" |
|
|
#for split_codes only first or last pt can be empty string |
|
|
foreach {pt ansiblock} [split_codes $string] { |
|
|
if {!$intext} { |
|
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
|
append out $ansiblock |
|
|
} else { |
|
|
append out [string trimleft $pt]$ansiblock |
|
|
set intext 1 |
|
|
} |
|
|
} else { |
|
|
append out $pt$ansiblock |
|
|
} |
|
|
} |
|
|
return $out |
|
|
} |
|
|
proc trimright {string} { |
|
|
if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing |
|
|
set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] |
|
|
return [join $rtrimmed_list ""] |
|
|
} |
|
|
proc trim {string} { |
|
|
#make sure we do our ansi-scanning split only once - so use list-based trim operations |
|
|
#order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length |
|
|
#we save a single function call by calling both here rather than _splits_trim |
|
|
join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" |
|
|
} |
|
|
|
|
|
proc index {string index} { |
|
|
#*** !doctools |
|
|
#[call [fun index] [arg string] [arg index]] |
|
|
#[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) |
|
|
#[para]Returns the character (with applied ansi effect) at position index |
|
|
#[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. |
|
|
#[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) |
|
|
#[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. |
|
|
#[para]todo: SGR codes within ST-terminated strings not yet ignored properly |
|
|
#[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. |
|
|
#[para]As any operation using end-+<int> will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. |
|
|
#[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. |
|
|
#[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code |
|
|
#[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. |
|
|
#[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. |
|
|
#[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible |
|
|
#[para]Notes: |
|
|
#[para]This function has to split the whole string into plaintext & ansi codes even for a very low index |
|
|
#[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. |
|
|
#[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal |
|
|
|
|
|
set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run |
|
|
|
|
|
#todo - end-x +/-x+/-x etc |
|
|
set original_index $index |
|
|
|
|
|
set index [string map [list _ ""] $index] |
|
|
#short-circuit some trivial cases |
|
|
if {[string is integer -strict $index]} { |
|
|
if {$index < 0} {return ""} |
|
|
#this only short-circuits an index greater than length including ansi-chars |
|
|
#we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length |
|
|
if {$index > [string length $string]} {return ""} |
|
|
} else { |
|
|
if {[string match end* $index]} { |
|
|
#for end- we will probably have to blow a few cycles stripping first and calculate the length |
|
|
if {$index ne "end"} { |
|
|
set op [string index $index 3] |
|
|
set offset [string range $index 4 end] |
|
|
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} |
|
|
if {$op eq "+" && $offset != 0} { |
|
|
return "" |
|
|
} |
|
|
} else { |
|
|
set offset 0 |
|
|
} |
|
|
#by now, if op = + then offset = 0 so we only need to handle the minus case |
|
|
set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal |
|
|
if {$offset == 0} { |
|
|
set index [expr {$payload_len-1}] |
|
|
} else { |
|
|
set index [expr {($payload_len-1) - $offset}] |
|
|
} |
|
|
if {$index < 0} { |
|
|
#don't waste time splitting and looping the string |
|
|
return "" |
|
|
} |
|
|
} else { |
|
|
#we are trying to avoid evaluating unbraced expr of potentially insecure origin |
|
|
regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string |
|
|
if {[string is integer -strict $tail]} { |
|
|
#plain +-<int> |
|
|
if {$op eq "-"} { |
|
|
#return nothing for negative indices as per Tcl's lindex etc |
|
|
return "" |
|
|
} |
|
|
set index $tail |
|
|
} else { |
|
|
if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { |
|
|
if {[string is integer -strict $a] && [string is integer -strict $b]} { |
|
|
if {$op eq "-"} { |
|
|
set index [expr {$a - $b}] |
|
|
} else { |
|
|
set index [expr {$a + $b}] |
|
|
} |
|
|
} else { |
|
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
|
} |
|
|
} else { |
|
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
#any pt could be empty if using split_codes_single (or just first and last pt if split_codes) |
|
|
set low -1 |
|
|
set high -1 |
|
|
set pt_index -2 |
|
|
set pt_found -1 |
|
|
set char "" |
|
|
set codes_in_effect "" |
|
|
#we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go |
|
|
#(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) |
|
|
foreach {pt code} $ansisplits { |
|
|
incr pt_index 2 |
|
|
#we want an index per grapheme - whether it is doublewide or single |
|
|
|
|
|
if {$pt ne ""} { |
|
|
set graphemes [punk::char::grapheme_split $pt] |
|
|
set low [expr {$high + 1}] ;#last high |
|
|
#incr high [string length $pt] |
|
|
incr high [llength $graphemes] |
|
|
} |
|
|
|
|
|
if {$pt ne "" && ($index >= $low && $index <= $high)} { |
|
|
set pt_found $pt_index |
|
|
#set char [string index $pt $index-$low] |
|
|
set char [lindex $graphemes $index-$low] |
|
|
break |
|
|
} |
|
|
|
|
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
|
#we can throw away previous codes_in_effect |
|
|
set codes_in_effect "" |
|
|
} else { |
|
|
#may have partial resets - but we don't want to track individual states of SGR features |
|
|
#A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. |
|
|
#we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. |
|
|
#Review - consider if any other types of code make sense to retain in the output in this context. |
|
|
if {[punk::ansi::codetype::is_sgr $code]} { |
|
|
append codes_in_effect $code |
|
|
} |
|
|
} |
|
|
|
|
|
} |
|
|
if {$pt_found >= 0} { |
|
|
return $codes_in_effect$char |
|
|
} else { |
|
|
return "" |
|
|
} |
|
|
} |
|
|
|
|
|
#helper to convert indices (possibly of form x+y end-x etc) to numeric values within the payload range i.e without ansi |
|
|
#return empty string for each index that is out of range |
|
|
#review - this is possibly too slow to be very useful as is. |
|
|
# consider converting to oo and maintaining state of ansisplits so we don't repeat relatively expensive operations for same string |
|
|
proc INDEXABSOLUTE {string args} { |
|
|
set payload_len -1 ;# -1 as token to indicate we haven't calculated it yet (only want to call it once at most) |
|
|
set testindices [list] |
|
|
foreach index $args { |
|
|
if {[string is integer -strict $index]} { |
|
|
if {$index < 0} { |
|
|
lappend testindices "" |
|
|
} elseif {$index > [string length $string]} { |
|
|
#this only short-circuits an index greater than length including ansi-chars |
|
|
#we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length |
|
|
lappend testindices "" |
|
|
} else { |
|
|
lappend testindices $index |
|
|
} |
|
|
} else { |
|
|
if {[string match end* $index]} { |
|
|
#for end- we will probably have to blow a few cycles stripping first and calculate the length |
|
|
if {$index ne "end"} { |
|
|
set op [string index $index 3] |
|
|
set offset [string range $index 4 end] |
|
|
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} |
|
|
if {$op eq "+" && $offset != 0} { |
|
|
lappend testindices "" |
|
|
continue |
|
|
} |
|
|
} else { |
|
|
set offset 0 |
|
|
} |
|
|
#by now, if op = + then offset = 0 so we only need to handle the minus case |
|
|
if {$payload_len == -1} { |
|
|
set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal |
|
|
} |
|
|
if {$offset == 0} { |
|
|
set index [expr {$payload_len-1}] |
|
|
} else { |
|
|
set index [expr {($payload_len-1) - $offset}] |
|
|
} |
|
|
if {$index < 0} { |
|
|
lappend testindices "" |
|
|
} else { |
|
|
lappend testindices $index |
|
|
} |
|
|
} else { |
|
|
#we are trying to avoid evaluating unbraced expr of potentially insecure origin |
|
|
regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string |
|
|
if {[string is integer -strict $tail]} { |
|
|
#plain +-<int> |
|
|
if {$op eq "-"} { |
|
|
#return nothing for negative indices as per Tcl's lindex etc |
|
|
lappend indices "" |
|
|
continue |
|
|
} |
|
|
set index $tail |
|
|
lappend testindices $index |
|
|
} else { |
|
|
if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { |
|
|
if {[string is integer -strict $a] && [string is integer -strict $b]} { |
|
|
if {$op eq "-"} { |
|
|
set index [expr {$a - $b}] |
|
|
} else { |
|
|
set index [expr {$a + $b}] |
|
|
} |
|
|
} else { |
|
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
|
} |
|
|
} else { |
|
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
|
} |
|
|
lappend testindices $index |
|
|
} |
|
|
} |
|
|
} |
|
|
#assert - we made exactly one append to testindices if there was no error |
|
|
} |
|
|
#we now have numeric or empty string indices - but haven't fully checked they are within the underlying payload length |
|
|
|
|
|
if {[join $testindices ""] eq ""} { |
|
|
#don't calc ansistring length if no indices to check |
|
|
return $testindices |
|
|
} |
|
|
if {$payload_len == -1} { |
|
|
set payload_len [punk::ansi::ansistring::length $string] |
|
|
} |
|
|
set indices [list] |
|
|
foreach ti $testindices { |
|
|
if {$ti ne ""} { |
|
|
if {$ti < $payload_len} { |
|
|
lappend indices $ti |
|
|
} else { |
|
|
lappend indices "" |
|
|
} |
|
|
} else { |
|
|
lappend indices "" |
|
|
} |
|
|
} |
|
|
return $indices |
|
|
|
|
|
} |
|
|
|
|
|
#Todo - rows! |
|
|
#return pair of column extents occupied by the character index supplied. |
|
|
#single-width grapheme will return pair of integers of equal value |
|
|
#doulbe-width grapheme will return a pair of consecutive indices |
|
|
proc INDEXCOLUMNS {string idx} { |
|
|
#There is an index per grapheme - whether it is 1 or 2 columns wide |
|
|
set index [lindex [INDEXABSOLUTE $string $idx] 0] |
|
|
if {$index eq ""} { |
|
|
return "" |
|
|
} |
|
|
set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run |
|
|
set low -1 ;#low and high grapheme indexes |
|
|
set high -1 |
|
|
set lowc 0 ;#low and high column (1 based) |
|
|
set highc 0 |
|
|
set col1 "" |
|
|
set col2 "" |
|
|
set row 1 |
|
|
foreach {pt code} $ansisplits { |
|
|
if {$pt ne ""} { |
|
|
set ptlines [split $pt \n] |
|
|
set ptlinecount [llength $ptlines] |
|
|
set ptlineindex 0 |
|
|
foreach ptline $ptlines { |
|
|
set graphemes [punk::char::grapheme_split $ptline] |
|
|
if {$ptlineindex > 0} { |
|
|
#todo - account for previous \n as a grapheme .. what column? It should theoretically be in the rightmost column |
|
|
#zero width |
|
|
set low [expr {$high + 1}] |
|
|
set lowc [expr {$highc + 1}] |
|
|
set high $low |
|
|
set highc $lowc |
|
|
if {$index == $low} { |
|
|
set char \n |
|
|
set col1 $lowc |
|
|
set col2 $col1 |
|
|
break |
|
|
} |
|
|
incr row |
|
|
set lowc 0 |
|
|
set highc 0 |
|
|
} |
|
|
set low [expr {$high + 1}] ;#last high |
|
|
set lowc [expr {$highc + 1}] |
|
|
set high [expr {$low + [llength $graphemes] -1}] |
|
|
set highc [expr {$lowc + [punk::char::ansifreestring_width $ptline] -1}] |
|
|
#puts "---row:$row lowc:$lowc highc:$highc $ptline graphemes:$graphemes" |
|
|
if {$index >= $low && $index <= $high} { |
|
|
set char [lindex $graphemes $index-$low] |
|
|
set prefix [join [lrange $graphemes 0 [expr {$index-$low-1}]] ""] |
|
|
set prefixlen [punk::char::ansifreestring_width $prefix] |
|
|
set col1 [expr {$lowc + $prefixlen}] |
|
|
set gwidth [punk::char::ansifreestring_width $char] |
|
|
if {$gwidth < 1} { |
|
|
puts stderr "ansistring INDEXCOLUMNS warning - grapheme width zero at column $col1 ??" |
|
|
return "" ;#grapheme doesn't occupy a column and isn't a newline? - review |
|
|
} |
|
|
set col2 [expr {$col1 + ($gwidth -1)}] |
|
|
break |
|
|
} |
|
|
incr ptlineindex |
|
|
} |
|
|
} |
|
|
} |
|
|
if {$col1 ne "" & $col2 ne ""} { |
|
|
return [list $col1 $col2] |
|
|
} |
|
|
} |
|
|
|
|
|
#multiple rows - return a list? |
|
|
#return the grapheme index that occupies column col (could be first or second half of 2-wide grapheme) |
|
|
proc COLUMNINDEX {string col} { |
|
|
|
|
|
set ansisplits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run |
|
|
set lowindex -1 ;#low and high grapheme indexes |
|
|
set highindex -1 |
|
|
set lowc 0 ;#low and high column (1 based) |
|
|
set highc 0 |
|
|
set col1 "" |
|
|
set col2 "" |
|
|
foreach {pt code} $ansisplits { |
|
|
if {$pt ne ""} { |
|
|
if {[string first \n $pt] < 0} { |
|
|
set graphemes [punk::char::grapheme_split $pt] |
|
|
set lowindex [expr {$highindex + 1}] ;#last high |
|
|
set lowc [expr {$highc + 1}] |
|
|
set highindex [expr {$lowindex + [llength $graphemes] -1}] |
|
|
set highc [expr {$lowc + [punk::char::ansifreestring_width $pt] -1}] |
|
|
if {$col >= $lowc && $col <= $highc} { |
|
|
if {$col == $lowc} { |
|
|
return $lowindex |
|
|
} elseif {$col == $highc} { |
|
|
return $highindex |
|
|
} |
|
|
set index [expr {$lowindex + 1}] |
|
|
set str "" |
|
|
foreach g $graphemes { |
|
|
append str $g |
|
|
set width [punk::char::ansifreestring_width $str] |
|
|
if {$lowc + $width >= $col} { |
|
|
return $index |
|
|
} |
|
|
incr index |
|
|
} |
|
|
error "ansistring COLUMNINDEX '$string' $col not found" ;#assert - shouldn't happen |
|
|
} |
|
|
} else { |
|
|
error "ansistring COLUMNINDEX multiline not implemented" |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
proc _splits_trimleft {sclist} { |
|
|
set intext 0 |
|
|
set outlist [list] |
|
|
foreach {pt ansiblock} $sclist { |
|
|
if {!$intext} { |
|
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
|
lappend outlist "" $ansiblock |
|
|
} else { |
|
|
lappend outlist [string trimleft $pt] $ansiblock |
|
|
set intext 1 |
|
|
} |
|
|
} else { |
|
|
lappend outlist $pt $ansiblock |
|
|
} |
|
|
} |
|
|
return $outlist |
|
|
} |
|
|
proc _splits_trimright {sclist} { |
|
|
set intext 0 |
|
|
set outlist [list] |
|
|
foreach {pt ansiblock} [lreverse $sclist] { |
|
|
if {!$intext} { |
|
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
|
lappend outlist "" $ansiblock |
|
|
} else { |
|
|
lappend outlist [string trimright $pt] $ansiblock |
|
|
set intext 1 |
|
|
} |
|
|
} else { |
|
|
lappend outlist $pt $ansiblock |
|
|
} |
|
|
} |
|
|
return [lreverse $outlist] |
|
|
} |
|
|
proc _splits_trim {sclist} { |
|
|
return [_splits_trimright [_splits_trimleft $sclist]] |
|
|
} |
|
|
|
|
|
#*** !doctools |
|
|
#[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] |
|
|
} |
|
|
|
|
|
namespace eval punk::ansi::internal { |
|
|
proc splitn {str {len 1}} { |
|
|
#from textutil::split::splitn |
|
|
if {$len <= 0} { |
|
|
return -code error "len must be > 0" |
|
|
} |
|
|
if {$len == 1} { |
|
|
return [split $str {}] |
|
|
} |
|
|
set result [list] |
|
|
set max [string length $str] |
|
|
set i 0 |
|
|
set j [expr {$len -1}] |
|
|
while {$i < $max} { |
|
|
lappend result [string range $str $i $j] |
|
|
incr i $len |
|
|
incr j $len |
|
|
} |
|
|
return $result |
|
|
} |
|
|
proc splitx {str {regexp {[\t \r\n]+}}} { |
|
|
#from textutil::split::splitx |
|
|
# Bugfix 476988 |
|
|
if {[string length $str] == 0} { |
|
|
return {} |
|
|
} |
|
|
if {[string length $regexp] == 0} { |
|
|
return [::split $str ""] |
|
|
} |
|
|
if {[regexp $regexp {}]} { |
|
|
return -code error \ |
|
|
"splitting on regexp \"$regexp\" would cause infinite loop" |
|
|
} |
|
|
set list {} |
|
|
set start 0 |
|
|
while {[regexp -start $start -indices -- $regexp $str match submatch]} { |
|
|
foreach {subStart subEnd} $submatch break |
|
|
foreach {matchStart matchEnd} $match break |
|
|
incr matchStart -1 |
|
|
incr matchEnd |
|
|
lappend list [string range $str $start $matchStart] |
|
|
if {$subStart >= $start} { |
|
|
lappend list [string range $str $subStart $subEnd] |
|
|
} |
|
|
set start $matchEnd |
|
|
} |
|
|
lappend list [string range $str $start end] |
|
|
return $list |
|
|
} |
|
|
|
|
|
proc printing_length_addchar {i c} { |
|
|
upvar outchars outc |
|
|
upvar outsizes outs |
|
|
set nxt [llength $outc] |
|
|
if {$i < $nxt} { |
|
|
lset outc $i $c |
|
|
} else { |
|
|
lappend outc $c |
|
|
} |
|
|
} |
|
|
|
|
|
#string to 2digit hex - e.g used by XTGETTCAP |
|
|
proc str2hex {input} { |
|
|
set 2hex "" |
|
|
foreach ch [split $input ""] { |
|
|
append 2hex [format %02X [scan $ch %c]] |
|
|
} |
|
|
return $2hex |
|
|
} |
|
|
proc hex2str {2digithexchars} { |
|
|
set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) |
|
|
if {$2digithexchars eq ""} { |
|
|
return "" |
|
|
} |
|
|
if {[string length $2digithexchars] % 2 != 0} { |
|
|
error "hex2str requires an even number of hex digits (2 per character)" |
|
|
} |
|
|
set 2str "" |
|
|
foreach pair [splitn $2digithexchars 2] { |
|
|
append 2str [format %c 0x$pair] |
|
|
} |
|
|
return $2str |
|
|
} |
|
|
} |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
## Ready |
|
|
package provide punk::ansi [namespace eval punk::ansi { |
|
|
variable version |
|
|
set version 0.1.1 |
|
|
}] |
|
|
return |
|
|
|
|
|
|
|
|
#*** !doctools |
|
|
#[manpage_end] |
|
|
|
|
|
|