You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1363 lines
57 KiB
1363 lines
57 KiB
# -*- tcl -*- |
|
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt |
|
# |
|
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
|
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# (C) 2023 |
|
# |
|
# @@ Meta Begin |
|
# Application punk::ansi 999999.0a1.0 |
|
# Meta platform tcl |
|
# Meta license <unspecified> |
|
# @@ Meta End |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
# doctools header |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
#*** !doctools |
|
#[manpage_begin punkshell_module_punk::ansi 0 999999.0a1.0] |
|
#[copyright "2023"] |
|
#[titledesc {Ansi string functions}] [comment {-- Name section and table of contents description --}] |
|
#[moddesc {punk Ansi library}] [comment {-- Description at end of page heading --}] |
|
#[require punk::ansi] |
|
#[keywords module ansi terminal console string] |
|
#[description] |
|
#[para]Ansi based terminal control string functions |
|
#[para]See [package punk::ansi::console] for related functions for controlling a console |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section Overview] |
|
#[para] overview of punk::ansi |
|
#[para]punk::ansi functions return their values - no implicit emission to console/stdout |
|
#[subsection Concepts] |
|
#[para]Ansi codes can be used to control most terminals on most platforms in an 'almost' standard manner |
|
#[para]There are many differences in terminal implementations - but most should support a core set of features |
|
#[para]punk::ansi does not contain any code for direct terminal manipulation via the local system APIs. |
|
#[para]Sticking to ansi codes where possible may be better for cross-platform and remote operation where such APIs are unlikely to be useable. |
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Requirements |
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[subsection dependencies] |
|
#[para] packages used by punk::ansi |
|
#[list_begin itemized] |
|
|
|
package require Tcl 8.6 |
|
#*** !doctools |
|
#[item] [package {Tcl 8.6}] |
|
|
|
# #package require frobz |
|
# #*** !doctools |
|
# #[item] [package {frobz}] |
|
|
|
#*** !doctools |
|
#[list_end] |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
|
|
#*** !doctools |
|
#[section API] |
|
|
|
|
|
|
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
namespace eval punk::ansi { |
|
#*** !doctools |
|
#[subsection {Namespace punk::ansi}] |
|
#[para] Core API functions for punk::ansi |
|
#[list_begin definitions] |
|
|
|
|
|
#see also ansicolor page on wiki https://wiki.tcl-lang.org/page/ANSI+color+control |
|
|
|
variable test "blah\033\[1;33mETC\033\[0;mOK" |
|
|
|
|
|
#Note that a? is actually a pattern. We can't explicitly match it without also matcing a+ ab etc. Presumably this won't matter here. |
|
namespace export\ |
|
{a?} {a+} a \ |
|
ansistring\ |
|
convert*\ |
|
clear*\ |
|
cursor_*\ |
|
detect*\ |
|
get_*\ |
|
move*\ |
|
reset*\ |
|
strip*\ |
|
test_decaln\ |
|
titleset\ |
|
|
|
|
|
variable escape_terminals |
|
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). |
|
dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"] |
|
#dict set escape_terminals CSI [list J K m n A B C D E F G s u] ;#basic |
|
dict set escape_terminals OSC [list \007 \033\\ \u009c] ;#note mix of 1 and 2-byte terminals |
|
dict set escape_terminals DCS [list \007 \033\\ \u009c] |
|
dict set escape_terminals MISC [list \007 \033\\ \u009c] |
|
#NOTE - we are assuming an OSC or DCS started with one type of sequence (7 or 8bit) can be terminated by either 7 or 8 bit ST (or BEL e.g wezterm ) |
|
#This using a different type of ST to that of the opening sequence is presumably unlikely in the wild - but who knows? |
|
|
|
variable standalone_codes |
|
set standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] |
|
#review - there doesn't seem to be an \x1b#7 |
|
# https://espterm.github.io/docs/VT100%20escape%20codes.html |
|
|
|
#self-contained 2 byte ansi escape sequences - review more? |
|
set ansi_2byte_codes_dict [dict create\ |
|
"reset_terminal" "\u001bc"\ |
|
"save_cursor_posn" "\u001b7"\ |
|
"restore_cursor_posn" "\u001b8"\ |
|
"cursor_up_one" "\u001bM"\ |
|
"NEL - Next Line" "\u001bE"\ |
|
"IND - Down one line" "\u001bD"\ |
|
"HTS - Set Tab Stop" "\u001bH"\ |
|
"DECPAM app keypad" "\x1b="\ |
|
"DECPNM norm keypad" "\x1b>"\ |
|
] |
|
|
|
#control strings |
|
#https://www.ecma-international.org/wp-content/uploads/ECMA-48_5th_edition_june_1991.pdf |
|
#<excerpt> |
|
#A control string is a string of bit combinations which may occur in the data stream as a logical entity for |
|
#control purposes. A control string consists of an opening delimiter, a command string or a character string, |
|
#and a terminating delimiter, the STRING TERMINATOR (ST). |
|
#A command string is a sequence of bit combinations in the range 00/08 to 00/13 and 02/00 to 07/14. |
|
#A character string is a sequence of any bit combination, except those representing START OF STRING |
|
#(SOS) or STRING TERMINATOR (ST). |
|
#The interpretation of the command string or the character string is not defined by this Standard, but instead |
|
#requires prior agreement between the sender and the recipient of the data. |
|
#The opening delimiters defined in this Standard are |
|
#a) APPLICATION PROGRAM COMMAND (APC) |
|
#b) DEVICE CONTROL STRING (DCS) |
|
#c) OPERATING SYSTEM COMMAND (OSC) |
|
#d) PRIVACY MESSAGE (PM) |
|
#e) START OF STRING (SOS) |
|
#</excerpt> |
|
|
|
#debatable whether strip should reveal the somethinghidden - some terminals don't hide it anyway. |
|
# "PM - Privacy Message" "\u001b^somethinghidden\033\\"\ |
|
#The intent is that it's not rendered to the terminal - so on balance it seems best to strip it out. |
|
#todo - review - printing_length calculations affected by whether terminal honours PMs or not. detect and accomodate. |
|
#review - can terminals handle SGR codes within a PM? |
|
#Wezterm will hide PM,SOS,APC - but not any part following an SGR code - i.e it seems to terminate hiding before the ST (apparently at the ) |
|
proc controlstring_PM {text} { |
|
return "\x1b^${text}\033\\" |
|
} |
|
proc controlstring_PM8 {text} { |
|
return "\x9e${text}\x9c" |
|
} |
|
proc controlstring_SOS {text} { |
|
return "\x1bX${text}\033\\" |
|
} |
|
proc controlstring_SOS8 {text} { |
|
return "\x98${text}\x9c" |
|
} |
|
proc controlstring_APC {text} { |
|
return "\x1b_${text}\033\\" |
|
} |
|
proc controlstring_APC8 {text} { |
|
return "\x9f${text}\x9c" |
|
} |
|
|
|
#candidate for zig/c implementation? |
|
proc stripansi {text} { |
|
#*** !doctools |
|
#[call [fun stripansi] [arg text] ] |
|
#[para]Return a string with ansi codes stripped out |
|
|
|
#todo - character set selection - SS2 SS3 - how are they terminated? REVIEW |
|
|
|
variable escape_terminals ;#dict |
|
variable standalone_codes ;#map to empty string |
|
|
|
set text [convert_g0 $text] |
|
|
|
|
|
#we should just map away the 2-byte sequences too |
|
#standalone 3 byte VT100 sequences - some of these work in wezterm |
|
#\x1b#3 double-height letters top half |
|
#\x1b#4 double-height letters bottom half |
|
#\x1b#5 single-width line |
|
#\x1b#6 double-width line |
|
#\x1b#8 dec test fill screen |
|
|
|
set text [string map $standalone_codes $text] |
|
|
|
#we process char by char - line-endings whether \r\n or \n should be processed as per any other character. |
|
#line endings can theoretically occur within an ansi escape sequence payload (review e.g title?) |
|
|
|
set inputlist [split $text ""] |
|
set outputlist [list] |
|
|
|
set in_escapesequence 0 |
|
#assumption - undertext already 'rendered' - ie no backspaces or carriagereturns or other cursor movement controls |
|
set i 0 |
|
foreach u $inputlist { |
|
set v [lindex $inputlist $i+1] |
|
set uv ${u}${v} |
|
if {$in_escapesequence eq "2b"} { |
|
#2nd byte - done. |
|
set in_escapesequence 0 |
|
} elseif {$in_escapesequence != 0} { |
|
set endseq [dict get $escape_terminals $in_escapesequence] |
|
if {$u in $endseq} { |
|
set in_escapesequence 0 |
|
} elseif {$uv in $endseq} { |
|
set in_escapesequence 2b ;#flag next byte as last in sequence |
|
} |
|
} else { |
|
#handle both 7-bit and 8-bit CSI and OSC |
|
if {[regexp {^(?:\033\[|\u009b)} $uv]} { |
|
set in_escapesequence CSI |
|
} elseif {[regexp {^(?:\033\]|\u009d)} $uv]} { |
|
set in_escapesequence OSC |
|
} elseif {[regexp {^(?:\033P|\u0090)} $uv]} { |
|
set in_escapesequence DCS |
|
} elseif {[regexp {^(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)} $uv]} { |
|
#SOS,PM,APC - all terminated with ST |
|
set in_escapesequence MISC |
|
} else { |
|
lappend outputlist $u |
|
} |
|
} |
|
incr i |
|
} |
|
return [join $outputlist ""] |
|
} |
|
|
|
#review - what happens when no terminator? |
|
#todo - map other chars to unicode equivs |
|
proc convert_g0 {text} { |
|
#using not \033 inside to stop greediness - review how does it compare to ".*?" |
|
set re {\033\(0[^\033]*\033\(B} |
|
set re2 {\033\(0(.*)\033\(B} ;#capturing |
|
set parts [::punk::ansi::ta::_perlish_split $re $text] |
|
set out "" |
|
foreach {pt g} $parts { |
|
append out $pt |
|
if {$g ne ""} { |
|
#puts --$g-- |
|
#box sample |
|
#lqk |
|
#x x |
|
#mqj |
|
#m = boxd_lur |
|
#set map [list l \u250f k \u2513] ;#heavy |
|
set map [list l \u250c q \u2500 k \u2510 x \u2502 m \u2514 j \u2518] ;#light |
|
|
|
regexp $re2 $g _match contents |
|
append out [string map $map $contents] |
|
} |
|
} |
|
return $out |
|
} |
|
|
|
#todo - convert esc(0 graphics sequences to single char unicode equivalents e.g box drawing set |
|
# esc) ?? |
|
proc stripansi_gx {text} { |
|
#e.g "\033(0" - select VT100 graphics for character set G0 |
|
#e.g "\033(B" - reset |
|
#e.g "\033)0" - select VT100 graphics for character set G1 |
|
#e.g "\033)X" - where X is any char other than 0 to reset ?? |
|
return [convert_g0 $text] |
|
} |
|
|
|
|
|
#CSI <n> m = SGR (Select Graphic Rendition) |
|
variable SGR_setting_map { |
|
bold 1 dim 2 blink 5 fastblink 6 noblink 25 hide 8 normal 22 |
|
underline 4 doubleunderline 21 nounderline 24 strike 9 nostrike 29 italic 3 noitalic 23 |
|
reverse 7 noreverse 27 defaultfg 39 defaultbg 49 |
|
overline 53 nooverline 55 frame 51 framecircle 52 noframe 54 |
|
} |
|
variable SGR_colour_map { |
|
black 30 red 31 green 32 yellow 33 blue 34 purple 35 cyan 36 white 37 |
|
Black 40 Red 41 Green 42 Yellow 43 Blue 44 Purple 45 Cyan 46 White 47 |
|
BLACK 100 RED 101 GREEN 102 YELLOW 103 BLUE 104 PURPLE 105 CYAN 106 WHITE 107 |
|
} |
|
variable SGR_map |
|
set SGR_map [dict merge $SGR_colour_map $SGR_setting_map] |
|
|
|
|
|
proc colourmap1 {{bgname White}} { |
|
package require textblock |
|
|
|
set bg [textblock::block 33 3 "[a+ $bgname] [a]"] |
|
set colormap "" |
|
for {set i 0} {$i <= 7} {incr i} { |
|
append colormap "_[a+ white bold 48\;5\;$i] $i [a]" |
|
} |
|
set map1 [overtype::left -transparent _ $bg "\n$colormap"] |
|
return $map1 |
|
} |
|
proc colourmap2 {{bgname White}} { |
|
package require textblock |
|
set bg [textblock::block 39 3 "[a+ $bgname] [a]"] |
|
set colormap "" |
|
for {set i 8} {$i <= 15} {incr i} { |
|
append colormap "_[a+ black normal 48\;5\;$i] $i [a]" ;#black normal is blacker than black bold - which often displays as a grey |
|
} |
|
set map2 [overtype::left -transparent _ $bg "\n$colormap"] |
|
return $map2 |
|
} |
|
proc a? {args} { |
|
#*** !doctools |
|
#[call [fun a?] [opt {ansicode...}]] |
|
#[para]Return an ansi string representing a table of codes and a panel showing the colours |
|
variable SGR_setting_map |
|
variable SGR_colour_map |
|
|
|
if {![llength $args]} { |
|
set out "" |
|
append out $SGR_setting_map \n |
|
append out $SGR_colour_map \n |
|
|
|
try { |
|
package require overtype ;# circular dependency - many components require overtype. Here we only need it for nice layout in the a? query proc - so we'll do a soft-dependency by only loading when needed and also wrapping in a try |
|
set bgname "White" |
|
set map1 [colourmap1 $bgname] |
|
set map1 [overtype::centre -transparent 1 $map1 "[a black $bgname]Standard colours[a]"] |
|
set map2 [colourmap2 $bgname] |
|
set map2 [overtype::centre -transparent 1 $map2 "[a black $bgname]High-intensity colours[a]"] |
|
append out [textblock::join $map1 " " $map2] \n |
|
#append out $map1[a] \n |
|
#append out $map2[a] \n |
|
|
|
|
|
|
|
} on error {result options} { |
|
puts stderr "Failed to draw colormap" |
|
puts stderr "$result" |
|
} finally { |
|
return $out |
|
} |
|
} else { |
|
set result [list] |
|
set rmap [lreverse $map] |
|
foreach i $args { |
|
if {[string is integer -strict $i]} { |
|
if {[dict exists $rmap $i]} { |
|
lappend result $i [dict get $rmap $i] |
|
} |
|
} else { |
|
if {[dict exists $map $i]} { |
|
lappend result $i [dict get $map $i] |
|
} |
|
} |
|
} |
|
return $result |
|
} |
|
} |
|
proc a+ {args} { |
|
#*** !doctools |
|
#[call [fun a+] [opt {ansicode...}]] |
|
#[para]Returns the ansi code to apply those from the supplied list - without any reset being performed first |
|
#[para] e.g to set foreground red and bold |
|
#[para]punk::ansi::a red bold |
|
#[para]to set background red |
|
#[para]punk::ansi::a Red |
|
#[para]see [cmd punk::ansi::a?] to display a list of codes |
|
|
|
#don't disable ansi here. |
|
#we want this to be available to call even if ansi is off |
|
variable SGR_map |
|
set t [list] |
|
foreach i $args { |
|
if {[string is integer -strict $i]} { |
|
lappend t $i |
|
} elseif {[string first ";" $i] >=0} { |
|
#literal with params |
|
lappend t $i |
|
} else { |
|
if {[dict exists $SGR_map $i]} { |
|
lappend t [dict get $SGR_map $i] |
|
} else { |
|
#accept examples for foreground |
|
# 256f-# or 256fg-# or 256f# |
|
# rgbf-<r>-<g>-<b> or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> |
|
if {[string match -nocase "256f*" $i]} { |
|
set cc [string trim [string range $i 4 end] -gG] |
|
lappend t "38;5;$cc" |
|
} elseif {[string match -nocase 256b* $i]} { |
|
set cc [string trim [string range $i 4 end] -gG] |
|
lappend t "48;5;$cc" |
|
} elseif {[string match -nocase rgbf* $i]} { |
|
set rgb [string trim [string range $i 4 end] -gG] |
|
lassign [split $rgb -] r g b |
|
lappend t "38;2;$r;$g;$b" |
|
} elseif {[string match -nocase rgbb* $i]} { |
|
set rgb [string trim [string range $i 4 end] -gG] |
|
lassign [split $rgb -] r g b |
|
lappend t "48;2;$r;$g;$b" |
|
} |
|
} |
|
} |
|
} |
|
# \033 - octal. equivalently \x1b in hex which is more common in documentation |
|
if {![llength $t]} { |
|
return "" ;# a+ nonexistent should return nothing rather than a reset ( \033\[\;m is a reset even without explicit zero(s)) |
|
} |
|
return "\x1b\[[join $t {;}]m" |
|
} |
|
proc a {args} { |
|
#*** !doctools |
|
#[call [fun a] [opt {ansicode...}]] |
|
#[para]Returns the ansi code to reset any current settings and apply those from the supplied list |
|
#[para] by calling punk::ansi::a with no arguments - the result is a reset to plain text |
|
#[para] e.g to set foreground red and bold |
|
#[para]punk::ansi::a red bold |
|
#[para]to set background red |
|
#[para]punk::ansi::a Red |
|
#[para]see [cmd punk::ansi::a?] to display a list of codes |
|
|
|
|
|
#don't disable ansi here. |
|
#we want this to be available to call even if ansi is off |
|
variable SGR_map |
|
set t [list] |
|
foreach i $args { |
|
if {[string is integer -strict $i]} { |
|
lappend t $i |
|
} elseif {[string first ";" $i] >=0} { |
|
#literal with params |
|
lappend t $i |
|
} else { |
|
if {[dict exists $SGR_map $i]} { |
|
lappend t [dict get $SGR_map $i] |
|
} else { |
|
#accept examples for foreground |
|
# 256f-# or 256fg-# or 256f# |
|
# rgbf-<r>-<g>-<b> or rgbfg-<r>-<g>-<b> or rgbf<r>-<g>-<b> |
|
if {[string match -nocase "256f*" $i]} { |
|
set cc [string trim [string range $i 4 end] -gG] |
|
lappend t "38;5;$cc" |
|
} elseif {[string match -nocase 256b* $i]} { |
|
set cc [string trim [string range $i 4 end] -gG] |
|
lappend t "48;5;$cc" |
|
} elseif {[string match -nocase rgbf* $i]} { |
|
set rgb [string trim [string range $i 4 end] -gG] |
|
lassign [split $rgb -] r g b |
|
lappend t "38;2;$r;$g;$b" |
|
} elseif {[string match -nocase rgbb* $i]} { |
|
set rgb [string trim [string range $i 4 end] -gG] |
|
lassign [split $rgb -] r g b |
|
lappend t "48;2;$r;$g;$b" |
|
} |
|
} |
|
} |
|
} |
|
# \033 - octal. equivalently \x1b in hex which is more common in documentation |
|
# empty list [a=] should do reset - same for [a= nonexistant] |
|
# explicit reset at beginning of parameter list for a= (as opposed to a+) |
|
set t [linsert $t 0 0] |
|
return "\x1b\[[join $t {;}]m" |
|
} |
|
|
|
|
|
|
|
|
|
proc get_code_name {code} { |
|
#*** !doctools |
|
#[call [fun get_code_name] [arg code]] |
|
#[para]for example |
|
#[para] get_code_name red will return 31 |
|
#[para] get_code_name 31 will return red |
|
variable SGR_map |
|
set res [list] |
|
foreach i [split $code ";"] { |
|
set ix [lsearch -exact $SGR_map $i] |
|
if {[string is digit -strict $code]} { |
|
if {$ix>-1} {lappend res [lindex $SGR_map [incr ix -1]]} |
|
} else { |
|
#reverse lookup code from name |
|
if {$ix>-1} {lappend res [lindex $SGR_map [incr ix]]} |
|
} |
|
} |
|
set res |
|
} |
|
proc reset {} { |
|
#*** !doctools |
|
#[call [fun reset]] |
|
#[para]reset console |
|
return "\x1bc" |
|
} |
|
proc reset_soft {} { |
|
#*** !doctools |
|
#[call [fun reset_soft]] |
|
return \x1b\[!p |
|
} |
|
proc reset_colour {} { |
|
#*** !doctools |
|
#[call [fun reset_colour]] |
|
#[para]reset colour only |
|
return "\x1b\[0m" |
|
} |
|
|
|
# -- --- --- --- --- |
|
proc clear {} { |
|
#*** !doctools |
|
#[call [fun clear]] |
|
return "\033\[2J" |
|
} |
|
proc clear_above {} { |
|
#*** !doctools |
|
#[call [fun clear_above]] |
|
return \033\[1J |
|
} |
|
proc clear_below {} { |
|
#*** !doctools |
|
#[call [fun clear_below]] |
|
return \033\[0J |
|
} |
|
|
|
proc clear_all {} { |
|
# - doesn't work?? |
|
return \033\[3J |
|
} |
|
#see also erase_ functions |
|
# -- --- --- --- --- |
|
|
|
proc cursor_on {} { |
|
#*** !doctools |
|
#[call [fun cursor_on]] |
|
return "\033\[?25h" |
|
} |
|
proc cursor_off {} { |
|
#*** !doctools |
|
#[call [fun cursor_off]] |
|
return "\033\[?25l" |
|
} |
|
|
|
# -- --- --- --- --- |
|
proc move {row col} { |
|
#*** !doctools |
|
#[call [fun move] [arg row] [arg col]] |
|
#[para]Return an ansi sequence to move to row,col |
|
#[para]aka cursor home |
|
return \033\[${row}\;${col}H |
|
} |
|
proc move_emit {row col data args} { |
|
#*** !doctools |
|
#[call [fun move_emit] [arg row] [arg col] [arg data] [opt {row col data...}]] |
|
#[para]Return an ansi string representing a move to row col with data appended |
|
#[para]row col data can be repeated any number of times to return a string representing the output of the data elements at all those points |
|
#[para]Compare to punk::console::move_emit which calls this function - but writes it to stdout |
|
#[para]punk::console::move_emit_return will also return the cursor to the original position |
|
#[para]There is no punk::ansi::move_emit_return because in a standard console there is no ansi string which can represent a jump back to starting position. |
|
#[para]There is an ansi code to write the current cursor position to stdin (which will generally display on the console) - this is not quite the same thing. |
|
#[para]punk::console::move_emit_return does it by emitting that code and starting a loop to read stdin |
|
#[para]punk::ansi could implement a move_emit_return using the punk::console mechanism - but the resulting string would capture the cursor position at the time the string is built - which is not necessarily when the string is used. |
|
#[para]The following example shows how to do this manually, emitting the string blah at screen position 10,10 and emitting DONE back at the line we started: |
|
#[para][example {punk::ansi::move_emit 10 10 blah {*}[punk::console::get_cursor_pos_list] DONE}] |
|
#[para]A string created by any move_emit_return for punk::ansi would not behave in an intuitive manner compared to other punk::ansi move functions - so is deliberately omitted. |
|
|
|
set out "" |
|
append out \033\[${row}\;${col}H$data |
|
foreach {row col data} $args { |
|
append out \033\[${row}\;${col}H$data |
|
} |
|
return $out |
|
} |
|
proc move_forward {{n 1}} { |
|
#*** !doctools |
|
#[call [fun move_forward] [arg n]] |
|
return \033\[${n}C |
|
} |
|
proc move_back {{n 1}} { |
|
#*** !doctools |
|
#[call [fun move_back] [arg n]] |
|
return \033\[${n}D |
|
} |
|
proc move_up {{n 1}} { |
|
#*** !doctools |
|
#[call [fun move_up] [arg n]] |
|
return \033\[${n}A |
|
} |
|
proc move_down {{n 1}} { |
|
#*** !doctools |
|
#[call [fun move_down] [arg n]] |
|
return \033\[${n}B |
|
} |
|
# -- --- --- --- --- |
|
|
|
|
|
# -- --- --- --- --- |
|
proc erase_line {} { |
|
#*** !doctools |
|
#[call [fun erase_line]] |
|
return \033\[2K |
|
} |
|
proc erase_sol {} { |
|
#*** !doctools |
|
#[call [fun erase_sol]] |
|
#[para]Erase to start of line, leaving cursor position alone. |
|
return \033\[1K |
|
} |
|
proc erase_eol {} { |
|
#*** !doctools |
|
#[call [fun erase_eol]] |
|
return \033\[K |
|
} |
|
#see also clear_above clear_below |
|
# -- --- --- --- --- |
|
|
|
proc cursor_pos {} { |
|
#*** !doctools |
|
#[call [fun cursor_pos]] |
|
#[para]cursor_pos unlikely to be useful on it's own like this as when written to the terminal, this sequence causes the terminal to emit the row;col sequence to stdin |
|
#[para]The output on screen will look something like ^[lb][lb]47;3R |
|
#[para]Use punk::console::get_cursor_pos or punk::console::get_cursor_pos_list instead. |
|
#[para]These functions will emit the code - but read it in from stdin so that it doesn't display, and then return the row and column as a colon-delimited string or list respectively. |
|
#[para]The punk::ansi::cursor_pos function is used by punk::console::get_cursor_pos and punk::console::get_cursor_pos_list |
|
return \033\[6n |
|
} |
|
|
|
|
|
#alternative to string terminator is \007 - |
|
proc titleset {windowtitle} { |
|
#*** !doctools |
|
#[call [fun titleset] [arg windowtitles]] |
|
#[para]Returns the code to set the title of the terminal window to windowtitle |
|
#[para]This may not work on terminals which have multiple panes/windows |
|
return "\033\]2;$windowtitle\033\\" ;#works for xterm and most derivatives |
|
} |
|
#titleget - https://invisible-island.net/xterm/xterm.faq.html#how2_title |
|
#no cross-platform ansi-only mechanism ? |
|
|
|
proc test_decaln {} { |
|
#Screen Alignment Test |
|
#Reset margins, move cursor to the top left, and fill the screen with 'E' |
|
#(doesn't work on many terminals - seems to work in FreeBSD 13.2 and wezterm on windows) |
|
return \x1b#8 |
|
} |
|
|
|
#length of text for printing characters only |
|
#review - unicode and other non-printing chars and combining sequences? |
|
#certain unicode chars are full-width (single char 2 columns wide) e.g see "Halfwdith and fullwidth forms" and ascii_fuillwidth blocks in punk::char::charset_names |
|
#review - is there an existing library or better method? print to a terminal and query cursor position? |
|
#Note this length calculation is only suitable for lines being appended to other strings if the line is pre-processed to account for backspace and carriage returns first |
|
#If the raw line is appended to another string without such processing - the backspaces & carriage returns can affect data prior to the start of the string. |
|
proc printing_length {line} { |
|
if {[string first \n $line] >= 0} { |
|
error "line_print_length must not contain newline characters" |
|
} |
|
|
|
#review - |
|
set line [punk::ansi::stripansi $line] |
|
|
|
set line [punk::char::strip_nonprinting_ascii $line] ;#only strip nonprinting after stripansi - some like BEL are part of ansi |
|
#backspace 0x08 only erases* printing characters anyway - so presumably order of processing doesn't matter |
|
#(* more correctly - moves cursor back) |
|
#backspace will not move beyond a preceding newline - but we have disallowed newlines for this function already |
|
#leading backspaces will eat into any string (even prompt in non-standard tclsh shell) that is prepended to the line |
|
# - but for the purposes of overtype we wouldn't want that - so we strip it here in the length calculation and should strip leading backspaces in the actual data concatenation operations too. |
|
#curiously - a backspace sequence at the end of a string also doesn't reduce the printing width - so we can also strip from RHS |
|
|
|
#Note that backspace following a \t will only shorten the string by one (ie it doesn't move back the whole tab width like it does interactively in the terminal) |
|
#for this reason - it would seem best to normalize the tabs to spaces prior to performing the backspace calculation - otherwise we won't account for the 'short' tabs it effectivley produces |
|
#normalize tabs to an appropriate* width |
|
#*todo - handle terminal/context where tabwidth != the default 8 spaces |
|
set line [textutil::tabify::untabify2 $line] |
|
|
|
set bs [format %c 0x08] |
|
#set line [string map [list "\r${bs}" "\r"] $line] ;#backsp following a \r will have no effect |
|
set line [string trim $line $bs] |
|
set n 0 |
|
|
|
set chars [split $line ""] |
|
#build an output |
|
set idx 0 |
|
set outchars [list] |
|
set outsizes [list] |
|
foreach c $chars { |
|
if {$c eq $bs} { |
|
if {$idx > 0} { |
|
incr idx -1 |
|
} |
|
} elseif {$c eq "\r"} { |
|
set idx 0 |
|
} else { |
|
punk::ansi::internal::printing_length_addchar $idx $c |
|
incr idx |
|
} |
|
} |
|
set line2 [join $outchars ""] |
|
return [punk::char::string_width $line2] |
|
} |
|
|
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::ansi ---}] |
|
} |
|
|
|
|
|
namespace eval punk::ansi { |
|
|
|
|
|
# -- --- --- --- --- --- |
|
#XTGETTCAP |
|
# xterm responds with |
|
# DCS 1 + r Pt ST for valid requests, adding to Pt an = , and |
|
# the value of the corresponding string that xterm would send, |
|
# or |
|
# DCS 0 + r ST for invalid requests. |
|
# The strings are encoded in hexadecimal (2 digits per |
|
# character). If more than one name is given, xterm replies |
|
# with each name/value pair in the same response. An invalid |
|
# name (one not found in xterm's tables) ends processing of the |
|
# list of names. |
|
proc xtgetcap {keylist} { |
|
#ESC P = 0x90 = DCS = Device Control String |
|
set hexkeys [list] |
|
foreach k $keylist { |
|
lappend hexkeys [util::str2hex $k] |
|
} |
|
set payload [join $hexkeys ";"] |
|
return "\x1bP+q$payload\x1b\\" |
|
} |
|
proc xtgetcap2 {keylist} { |
|
#ESC P = 0x90 = DCS = Device Control String |
|
set hexkeys [list] |
|
foreach k $keylist { |
|
lappend hexkeys [util::str2hex $k] |
|
} |
|
set payload [join $hexkeys ";"] |
|
return "\u0090+q$payload\u009c" |
|
} |
|
namespace eval codetype { |
|
#Functions that operate on a single ansi code sequence - not a sequence, and not codes embedded in another string |
|
proc is_sgr {code} { |
|
#SGR (Select Graphic Rendition) - codes ending in 'm' - e.g colour/underline |
|
#we will accept and pass through the less common colon separator (ITU Open Document Architecture) |
|
#Terminals should generally ignore it if they don't use it |
|
regexp {\033\[[0-9;:]*m$} $code |
|
} |
|
proc is_cursor_move_in_line {code} { |
|
#review - what about CSI n : m H where row n happens to be current line? |
|
regexp {\033\[[0-9]*(:?C|D|G)$} |
|
} |
|
#pure SGR reset with no other functions |
|
proc is_sgr_reset {code} { |
|
#todo 8-bit csi |
|
regexp {\033\[0*m$} $code |
|
} |
|
#whether this code has 0 (or equivalently empty) parameter (but may set others) |
|
#if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes |
|
#it generally only makes sense for the reset to be the first entry - otherwise the code has ineffective portions |
|
#However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params. |
|
#We will only look at initial parameter as this is the well-formed normal case. |
|
#Review - consider normalizing sgr codes to remove other redundancies such as setting fg or bg color twice in same code |
|
proc has_sgr_leadingreset {code} { |
|
set params "" |
|
regexp {\033\[(.*)m} $code _match params |
|
set plist [split $params ";"] |
|
if {[string trim [lindex $plist 0] 0] eq ""} { |
|
#e.g \033\[m \033\[0\;...m \033\[0000...m |
|
return 1 |
|
} else { |
|
return 0 |
|
} |
|
} |
|
|
|
#has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list? |
|
|
|
} |
|
namespace eval sequence_type { |
|
proc is_Fe {code} { |
|
# C1 control codes |
|
if {[regexp {^\033\[[\u0040-\u005F]}]} { |
|
#7bit - typical case |
|
return 1 |
|
} |
|
#8bit |
|
#review - all C1 escapes ? 0x80-0x90F |
|
#This is possibly problematic as it is affected by encoding. |
|
#According to https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit |
|
#"However, in character encodings used on modern devices such as UTF-8 or CP-1252, those codes are often used for other purposes, so only the 2-byte sequence is typically used." |
|
return 0 |
|
} |
|
proc is_Fs {code} { |
|
puts stderr "is_Fs unimplemented" |
|
} |
|
} |
|
# -- --- --- --- --- --- --- --- --- --- --- |
|
#todo - implement colour resets like the perl module: |
|
#https://metacpan.org/pod/Text::ANSI::Util |
|
#(saves up all ansi color codes since previous color reset and replays the saved codes after our highlighting is done) |
|
} |
|
|
|
|
|
namespace eval punk::ansi::ta { |
|
#*** !doctools |
|
#[subsection {Namespace punk::ansi::ta}] |
|
#[para] text ansi functions |
|
#[para] based on but not identical to the Perl Text Ansi module: |
|
#[para] https://github.com/perlancar/perl-Text-ANSI-Util/blob/master/lib/Text/ANSI/BaseUtil.pm |
|
#[list_begin definitions] |
|
namespace path ::punk::ansi |
|
|
|
#handle both 7-bit and 8-bit csi |
|
#review - does codepage affect this? e.g ebcdic has 8bit csi in different position |
|
|
|
#CSI |
|
#variable re_csi_open {(?:\033\[|\u009b)[0-9;]+} ;#too specific - doesn't detect \033\[m |
|
variable re_csi_open {(?:\033\[|\u009b)} |
|
|
|
#colour and style |
|
variable re_csi_colour {(?:\033\[|\u009b)[0-9;]*m} ;#e.g \033\[31m \033\[m \033\[0m \033\[m0000m |
|
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~). |
|
variable re_csi_code {(?:\033\[|\u009b)[0-9;]*[a-zA-Z\\@^_|~`]} |
|
|
|
#OSC - termnate with BEL (\a \007) or ST (string terminator \033\\) |
|
# 8-byte string terminator is \x9c (\u009c) |
|
|
|
#non-greedy via "*?" doesn't seem to work like this.. |
|
#variable re_esc_osc1 {(?:\033\]).*?\007} |
|
#variable re_esc_osc2 {(?:\033\]).*?\033\\} |
|
#variable re_esc_osc3 {(?:\u009d).*?\u009c} |
|
|
|
#non-greedy by excluding ST terminators |
|
#TODO - FIX? see re_ST below |
|
variable re_esc_osc1 {(?:\033\])(?:[^\007]*)\007} |
|
variable re_esc_osc2 {(?:\033\])(?:[^\033]*)\033\\} |
|
variable re_esc_osc3 {(?:\u009d)(?:[^\u009c]*)?\u009c} |
|
|
|
variable re_osc_open {(?:\033\]|\u009d).*} |
|
|
|
#standalone_codes [list \x1bc "" \x1b7 "" \x1b8 "" \x1bM "" \x1bE "" \x1bD "" \x1bH "" \x1b= "" \x1b> "" \x1b#3 "" \x1b#4 "" \x1b#5 "" \x1b#5 "" \x1b#6 "" \x1b#8 ""] |
|
variable re_standalones {(?:\x1bc|\x1b7|\x1b8|\x1bM|\x1bE|\x1bD|\x1bD|\x1bH|\x1b=|\x1b>|\x1b#3|\x1b#4|\x1b#5|\x1b#6|\x1b#8)} |
|
|
|
#see stripansi |
|
set re_start_ST {^(?:\033X|\u0098|\033\^|\u009e|\033_|\u009f)} |
|
#ST terminators [list \007 \033\\ \u009c] |
|
|
|
#regex to capture the start of string/privacy message/application command block including the contents and string terminator (ST) |
|
#non-greedy by exclusion of ST terminators in body |
|
#!!! |
|
#TODO - fix. we need to match \033\\ not just \033 ! could be colour codes nested in a privacy msg/string |
|
#This will currently terminate the code too early in this case |
|
#we also need to track the start of ST terminated code and not add it for replay (in the ansistring functions) |
|
variable re_ST {(?:\033X|\u0098|\033\^|\u009E|\033_|\u009F)(?:[^\033\007\u009c]*)(?:\033\\|\007|\u009c)} |
|
|
|
variable re_ansi_detect "${re_csi_open}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_start_ST}" |
|
|
|
#detect any ansi escapes |
|
#review - only detect 'complete' codes - or just use the opening escapes for performance? |
|
proc detect {text} { |
|
#*** !doctools |
|
#[call [fun detect] [arg text]] |
|
#[para]Return a boolean indicating whether Ansi codes were detected in text |
|
#[para] |
|
|
|
variable re_ansi_detect |
|
#variable re_csi_open |
|
#variable re_esc_osc1 |
|
#variable re_esc_osc2 |
|
#todo - other escape sequences |
|
#expr {[regexp $re_csi_open $text] || [regexp $re_esc_osc1 $text] || [regexp $re_esc_osc2 $text]} |
|
expr {[regexp $re_ansi_detect $text]} |
|
} |
|
#not in perl ta |
|
proc detect_csi {text} { |
|
#*** !doctools |
|
#[call [fun detect_csi] [arg text]] |
|
#[para]Return a boolean indicating whether an Ansi Control Sequence Introducer (CSI) was detected in text |
|
#[para]The csi is often represented in code as \x1b or \033 followed by a left bracket [lb] |
|
#[para]The initial byte or escape is commonly referenced as ESC in Ansi documentation |
|
#[para]There is also a multi-byte escape sequence \u009b |
|
#[para]This is less commonly used but is also detected here |
|
#[para](This function is not in perl ta) |
|
variable re_csi_open |
|
expr {[regexp $re_csi_open $text]} |
|
} |
|
proc detect_sgr {text} { |
|
#*** !doctools |
|
#[call [fun detect_sgr] [arg text]] |
|
#[para]Return a boolean indicating whether an ansi Select Graphics Rendition code was detected. |
|
#[para]This is the set of CSI sequences ending in 'm' |
|
#[para]This is most commonly an Ansi colour code - but also things such as underline and italics |
|
#[para]An SGR with empty or a single zero argument is a reset of the SGR features - this is also detected. |
|
#[para](This function is not in perl ta) |
|
variable re_csi_colour |
|
expr {[regexp $re_csi_colour $text]} |
|
} |
|
proc strip {text} { |
|
#*** !doctools |
|
#[call [fun strip] [arg text]] |
|
#[para]Return text stripped of Ansi codes |
|
#[para]This is a tailcall to punk::ansi::stripansi |
|
tailcall stripansi $text |
|
} |
|
proc length {text} { |
|
#*** !doctools |
|
#[call [fun length] [arg text]] |
|
#[para]Return the character length after stripping ansi codes - not the printing length |
|
string length [stripansi $text] |
|
} |
|
#todo - handle newlines |
|
#not in perl ta |
|
#proc printing_length {text} { |
|
# |
|
#} |
|
|
|
proc trunc {text width args} { |
|
|
|
} |
|
|
|
#not in perl ta |
|
#returns just the plaintext portions in a list |
|
proc split_at_codes {text} { |
|
variable re_esc_osc1 |
|
variable re_esc_osc2 |
|
variable re_csi_code |
|
variable re_standalones |
|
variable re_ST |
|
punk::ansi::internal::splitx $text "${re_csi_code}|${re_esc_osc1}|${re_esc_osc2}|${re_standalones}|${re_ST}" |
|
} |
|
|
|
# -- --- --- --- --- --- |
|
#Split $text to a list containing alternating ANSI color codes and text. |
|
#ANSI color codes are always on the second element, fourth, and so on. |
|
#(ie plaintext on odd list-indices ansi on even indices) |
|
# Example: |
|
#ta_split_codes "" # => "" |
|
#ta_split_codes "a" # => "a" |
|
#ta_split_codes "a\e[31m" # => {"a" "\e[31m"} |
|
#ta_split_codes "\e[31ma" # => {"" "\e[31m" "a"} |
|
#ta_split_codes "\e[31ma\e[0m" # => {"" "\e[31m" "a" "\e[0m"} |
|
#ta_split_codes "\e[31ma\e[0mb" # => {"" "\e[31m" "a" "\e[0m", "b"} |
|
#ta_split_codes "\e[31m\e[0mb" # => {"" "\e[31m\e[0m" "b"} |
|
# |
|
proc split_codes {text} { |
|
variable re_esc_osc1 |
|
variable re_esc_osc2 |
|
variable re_csi_code |
|
variable re_standalones |
|
variable re_ST |
|
set re "(?:${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2})+" |
|
return [_perlish_split $re $text] |
|
} |
|
#like split_codes - but each ansi-escape is split out separately (with empty string of plaintext between codes so odd/even plain ansi still holds) |
|
proc split_codes_single {text} { |
|
variable re_esc_osc1 |
|
variable re_esc_osc2 |
|
variable re_csi_code |
|
variable re_standalones |
|
variable re_ST |
|
set re "${re_csi_code}|${re_standalones}|${re_ST}|${re_esc_osc1}|${re_esc_osc2}" |
|
return [_perlish_split $re $text] |
|
} |
|
|
|
#review - tcl greedy expressions may match multiple in one element |
|
proc _perlish_split {re text} { |
|
if {[string length $text] == 0} { |
|
return {} |
|
} |
|
set list [list] |
|
set start 0 |
|
|
|
#We can get $matchEnd < $matchStart; we need to ensure there is an exit condition for non-greedy empty results REVIEW |
|
while {[regexp -start $start -indices -- $re $text match]} { |
|
lassign $match matchStart matchEnd |
|
#puts "->start $start ->match $matchStart $matchEnd" |
|
if {$matchEnd < $matchStart} { |
|
lappend list [string range $text $start $matchStart-1] [string index $text $matchStart] |
|
incr start |
|
if {$start >= [string length $text]} { |
|
break |
|
} |
|
continue |
|
} |
|
lappend list [string range $text $start $matchStart-1] [string range $text $matchStart $matchEnd] |
|
set start [expr {$matchEnd+1}] |
|
|
|
#? |
|
if {$start >= [string length $text]} { |
|
break |
|
} |
|
} |
|
lappend list [string range $text $start end] |
|
return $list |
|
} |
|
proc _ws_split {text} { |
|
regexp -all -inline {(?:\S+)|(?:\s+)} $text |
|
} |
|
# -- --- --- --- --- --- |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] |
|
} |
|
# -- --- --- --- --- --- --- --- --- --- --- |
|
|
|
namespace eval punk::ansi::ansistring { |
|
#*** !doctools |
|
#[subsection {Namespace punk::ansi::ansistring}] |
|
#[para]punk::ansi::ansistring ensemble - ansi-aware string operations |
|
#[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings |
|
#[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly. |
|
#[list_begin definitions] |
|
|
|
namespace path [list ::punk::ansi ::punk::ansi::ta] |
|
namespace ensemble create |
|
namespace export length trim trimleft trimright index VIEW |
|
#todo - expose _splits_ methods so caller can work efficiently with the splits themselves |
|
#we need to consider whether these can be agnostic towards splits from split_codes vs split_codes_single |
|
|
|
#\UFFFD - replacement char or \U2426 |
|
|
|
#using ISO 2047 graphical representations of control characters |
|
#00 NUL Null ⎕ U+2395 NU |
|
#01 TC1, SOH Start of Heading ⌈ U+2308 SH |
|
#02 TC2, STX Start of Text ⊥ U+22A5 SX |
|
#03 TC3, ETX End of Text ⌋ U+230B EX |
|
#04 TC4, EOT End of Transmission ⌁ U+2301[9] ET |
|
#05 TC5, ENQ Enquiry ⊠[a] U+22A0 EQ |
|
#06 TC6, ACK Acknowledge ✓ U+2713 AK |
|
#07 BEL Bell ⍾ U+237E[9] BL |
|
#08 FE0, BS Backspace ⤺ —[b] BS |
|
#09 FE1, HT Horizontal Tabulation ⪫ U+2AAB HT |
|
#0A FE2, LF Line Feed ≡ U+2261 LF |
|
#0B FE3, VT Vertical Tabulation ⩛ U+2A5B VT |
|
#0C FE4, FF Form Feed ↡ U+21A1 FF |
|
#0D FE5, CR Carriage Return ⪪ U+2AAA CR |
|
#0E SO Shift Out ⊗ U+2297 SO |
|
#0F SI Shift In ⊙ U+2299 SI |
|
#10 TC7, DLE Data Link Escape ⊟ U+229F DL |
|
#11 DC1, XON, CON[10] Device Control 1 ◷ U+25F7 D1 |
|
#12 DC2, RPT,[10] TAPE[c] Device Control 2 ◶ U+25F6 D2 |
|
#13 DC3, XOF, XOFF Device Control 3 ◵ U+25F5 D3 |
|
#14 DC4, COF, KMC,[10] TAPE[c] Device Control 4 ◴ U+25F4 D4 |
|
#15 TC8, NAK Negative Acknowledge ⍻ U+237B[9] NK |
|
#16 TC9, SYN Synchronization ⎍ U+238D SY |
|
#17 TC10, ETB End of Transmission Block ⊣ U+22A3 EB |
|
#18 CAN Cancel ⧖ U+29D6 CN |
|
#19 EM End of Medium ⍿ U+237F[9] EM |
|
#1A SUB Substitute Character ␦ U+2426[12] SB |
|
#1B ESC Escape ⊖ U+2296 EC |
|
#1C IS4, FS File Separator ◰ U+25F0 FS |
|
#1D IS3, GS Group Separator ◱ U+25F1 GS |
|
#1E IS2, RS Record Separator ◲ U+25F2 RS |
|
#1F IS1 US Unit Separator ◳ U+25F3 US |
|
#20 SP Space △ U+25B3 SP |
|
#7F DEL Delete ▨ —[d] DT |
|
proc VIEW {string} { |
|
return [string map [list \033 \U2296 \007 \U237E] $string] |
|
} |
|
|
|
proc length {string} { |
|
#*** !doctools |
|
#[call [fun length] [arg string]] |
|
#[para]Returns the length of the string without ansi codes |
|
#[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence. |
|
#[para]This is equivalent to calling string length on the result of stripansi $string |
|
#[para]Note that this returns the number of characters in the payload, and is not always the same as the width of the string as rendered on a terminal. |
|
#[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware. |
|
string length [stripansi $string] |
|
} |
|
|
|
proc trimleft {string args} { |
|
set intext 0 |
|
set out "" |
|
#for split_codes only first or last pt can be empty string |
|
foreach {pt ansiblock} [split_codes $string] { |
|
if {!$intext} { |
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
append out $ansiblock |
|
} else { |
|
append out [string trimleft $pt]$ansiblock |
|
set intext 1 |
|
} |
|
} else { |
|
append out $pt$ansiblock |
|
} |
|
} |
|
return $out |
|
} |
|
proc trimright {string} { |
|
if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing |
|
set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]] |
|
return [join $rtrimmed_list ""] |
|
} |
|
proc trim {string} { |
|
#make sure we do our ansi-scanning split only once - so use list-based trim operations |
|
#order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length |
|
#we save a single function call by calling both here rather than _splits_trim |
|
join [_splits_trimright [_splits_trimleft [split_codes $string]]] "" |
|
} |
|
|
|
proc index {string index} { |
|
#*** !doctools |
|
#[call [fun index] [arg string] [arg index]] |
|
#[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes) |
|
#[para]Returns the character (with applied ansi effect) at position index |
|
#[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output. |
|
#[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST) |
|
#[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them. |
|
#[para]todo: SGR codes within ST-terminated strings not yet ignored properly |
|
#[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards. |
|
#[para]As any operation using end-+<int> will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index. |
|
#[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that. |
|
#[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code |
|
#[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered. |
|
#[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be. |
|
#[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible |
|
#[para]Notes: |
|
#[para]This function has to split the whole string into plaintext & ansi codes even for a very low index |
|
#[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks. |
|
#[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal |
|
|
|
set splits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run |
|
|
|
#todo - end-x +/-x+/-x etc |
|
set original_index $index |
|
|
|
set index [string map [list _ ""] $index] |
|
#short-circuit some trivial cases |
|
if {[string is integer -strict $index]} { |
|
if {$index < 0} {return ""} |
|
#this only short-circuits an index greater than length including ansi-chars |
|
#we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length |
|
if {$index > [string length $string]} {return ""} |
|
} else { |
|
if {[string match end* $index]} { |
|
#for end- we will probably have to blow a few cycles stripping first and calculate the length |
|
if {$index ne "end"} { |
|
set op [string index $index 3] |
|
set offset [string range $index 4 end] |
|
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"} |
|
if {$op eq "+" && $offset != 0} { |
|
return "" |
|
} |
|
} else { |
|
set offset 0 |
|
} |
|
#by now, if op = + then offset = 0 so we only need to handle the minus case |
|
set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal |
|
if {$offset == 0} { |
|
set index [expr {$payload_len-1}] |
|
} else { |
|
set index [expr {($payload_len-1) - $offset}] |
|
} |
|
if {$index < 0} { |
|
#don't waste time splitting and looping the string |
|
return "" |
|
} |
|
} else { |
|
#we are trying to avoid evaluating unbraced expr of potentially insecure origin |
|
regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string |
|
if {[string is integer -strict $tail]} { |
|
#plain +-<int> |
|
if {$op eq "-"} { |
|
#return nothing for negative indices as per Tcl's lindex etc |
|
return "" |
|
} |
|
set index $tail |
|
} else { |
|
if {[regexp {(.*)([+-])(.*)} $index _match a op b]} { |
|
if {[string is integer -strict $a] && [string is integer -strict $b]} { |
|
if {$op eq "-"} { |
|
set index [expr {$a - $b}] |
|
} else { |
|
set index [expr {$a + $b}] |
|
} |
|
} else { |
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
} |
|
} else { |
|
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?" |
|
} |
|
} |
|
} |
|
} |
|
|
|
#any pt could be empty if using split_codes_single (or just first and last pt if split_codes) |
|
set low -1 |
|
set high -1 |
|
set pt_index -2 |
|
set pt_found -1 |
|
set char "" |
|
set codes_in_effect "" |
|
#we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go |
|
#(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway) |
|
foreach {pt code} $splits { |
|
incr pt_index 2 |
|
if {$pt ne ""} { |
|
set low [expr {$high + 1}] ;#last high |
|
incr high [string length $pt] |
|
} |
|
|
|
if {$pt ne "" && ($index >= $low && $index <= $high)} { |
|
set pt_found $pt_index |
|
set char [string index $pt $index-$low] |
|
break |
|
} |
|
|
|
if {[punk::ansi::codetype::is_sgr_reset $code]} { |
|
#we can throw away previous codes_in_effect |
|
set codes_in_effect "" |
|
} else { |
|
#may have partial resets - but we don't want to track individual states of SGR features |
|
#A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end. |
|
#we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed. |
|
#Review - consider if any other types of code make sense to retain in the output in this context. |
|
if {[punk::ansi::codetype::is_sgr $code]} { |
|
append codes_in_effect $code |
|
} |
|
} |
|
|
|
} |
|
if {$pt_found >= 0} { |
|
return $codes_in_effect$char |
|
} else { |
|
return "" |
|
} |
|
} |
|
|
|
proc _splits_trimleft {sclist} { |
|
set intext 0 |
|
set outlist [list] |
|
foreach {pt ansiblock} $sclist { |
|
if {!$intext} { |
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
lappend outlist "" $ansiblock |
|
} else { |
|
lappend outlist [string trimleft $pt] $ansiblock |
|
set intext 1 |
|
} |
|
} else { |
|
lappend outlist $pt $ansiblock |
|
} |
|
} |
|
return $outlist |
|
} |
|
proc _splits_trimright {sclist} { |
|
set intext 0 |
|
set outlist [list] |
|
foreach {pt ansiblock} [lreverse $sclist] { |
|
if {!$intext} { |
|
if {$pt eq "" || [regexp {^\s+$} $pt]} { |
|
lappend outlist "" $ansiblock |
|
} else { |
|
lappend outlist [string trimright $pt] $ansiblock |
|
set intext 1 |
|
} |
|
} else { |
|
lappend outlist $pt $ansiblock |
|
} |
|
} |
|
return [lreverse $outlist] |
|
} |
|
proc _splits_trim {sclist} { |
|
return [_splits_trimright [_splits_trimleft $sclist]] |
|
} |
|
|
|
#*** !doctools |
|
#[list_end] [comment {--- end definitions namespace punk::ansi::ta ---}] |
|
} |
|
|
|
namespace eval punk::ansi::internal { |
|
proc splitn {str {len 1}} { |
|
#from textutil::split::splitn |
|
if {$len <= 0} { |
|
return -code error "len must be > 0" |
|
} |
|
if {$len == 1} { |
|
return [split $str {}] |
|
} |
|
set result [list] |
|
set max [string length $str] |
|
set i 0 |
|
set j [expr {$len -1}] |
|
while {$i < $max} { |
|
lappend result [string range $str $i $j] |
|
incr i $len |
|
incr j $len |
|
} |
|
return $result |
|
} |
|
proc splitx {str {regexp {[\t \r\n]+}}} { |
|
#from textutil::split::splitx |
|
# Bugfix 476988 |
|
if {[string length $str] == 0} { |
|
return {} |
|
} |
|
if {[string length $regexp] == 0} { |
|
return [::split $str ""] |
|
} |
|
if {[regexp $regexp {}]} { |
|
return -code error \ |
|
"splitting on regexp \"$regexp\" would cause infinite loop" |
|
} |
|
set list {} |
|
set start 0 |
|
while {[regexp -start $start -indices -- $regexp $str match submatch]} { |
|
foreach {subStart subEnd} $submatch break |
|
foreach {matchStart matchEnd} $match break |
|
incr matchStart -1 |
|
incr matchEnd |
|
lappend list [string range $str $start $matchStart] |
|
if {$subStart >= $start} { |
|
lappend list [string range $str $subStart $subEnd] |
|
} |
|
set start $matchEnd |
|
} |
|
lappend list [string range $str $start end] |
|
return $list |
|
} |
|
|
|
proc printing_length_addchar {i c} { |
|
upvar outchars outc |
|
upvar outsizes outs |
|
set nxt [llength $outc] |
|
if {$i < $nxt} { |
|
lset outc $i $c |
|
} else { |
|
lappend outc $c |
|
} |
|
} |
|
|
|
#string to 2digit hex - e.g used by XTGETTCAP |
|
proc str2hex {input} { |
|
set 2hex "" |
|
foreach ch [split $input ""] { |
|
append 2hex [format %02X [scan $ch %c]] |
|
} |
|
return $2hex |
|
} |
|
proc hex2str {2digithexchars} { |
|
set 2digithexchars [string map [list _ ""] $2digithexchars] ;#compatibility with tcl tip 551 (compatibility in the sense that users might expect to be able to use underscores and it's nice to support the syntax here too - not that it's required) |
|
if {$2digithexchars eq ""} { |
|
return "" |
|
} |
|
if {[string length $2digithexchars] % 2 != 0} { |
|
error "hex2str requires an even number of hex digits (2 per character)" |
|
} |
|
set 2str "" |
|
foreach pair [splitn $2digithexchars 2] { |
|
append 2str [format %c 0x$pair] |
|
} |
|
return $2str |
|
} |
|
} |
|
|
|
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
|
## Ready |
|
package provide punk::ansi [namespace eval punk::ansi { |
|
variable version |
|
set version 999999.0a1.0 |
|
}] |
|
return |
|
|
|
|
|
#*** !doctools |
|
#[manpage_end] |
|
|
|
|