error "punk::args::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'"
#adhoc and currentproject* paths are relative to cwd - so no projectbase information can be stored at registration time.
#not all template item types will need projectbase information - as the item data may be self-contained within the template structure -
#but project_layout will need it - or at least need to know if there is no project - because project_layout data is never stored in the template folder structure directly.
if {$pathtype eq "adhoc"} {
switch -- $pathtype {
adhoc {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "module"} {
}
module {
set provide_statement [package ifneeded $pkg [package require $pkg]]
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
} elseif {$pathtype eq "currentproject"} {
}
currentproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
#currently only intended for punk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
dict set extended_capdict projectbase $projectbase
} elseif {$pathtype eq "absolute"} {
}
absolute {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
dict set extended_capdict projectbase $projectbase
} else {
}
default {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
#major named sets such as unicode blocks, scripts, and other sets such as microsoft WGL4
#case insensitive search - possibly with globs
proc charset_names {{namesearch *}} {
#case insensitive search - possibly with *basic* globs (? and * only - not lb rb)
proc charset_names2 {{namesearch *}} {
variable charsets
set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [dict keys $charsets]]
if {$namesearch eq "*"} {
return $sortedkeys
}
if {[regexp {[?*]} $namesearch]} {
#name glob search
set matched_names [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
set matched [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - stop on first match
if {[llength $matched]} {
return [list $matched]
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
return [list]
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
#review is detecting \033 enough? what about 8-bit escapes?
#maint warning - also in overtype!
#intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy)
proc grapheme_width_cached {ch} {
variable grapheme_widths
if {[dict exists $grapheme_widths $ch]} {
return [dict get $grapheme_widths $ch]
}
set width [punk::char::ansifreestring_width $ch] ;#review - can we provide faster version if we know it's a single grapheme rather than a string? (grapheme is still a string as it may have combiners/diacritics)
dict set grapheme_widths $ch $width
return $width
}
#no char_width - use grapheme_width terminology to be clearer
proc grapheme_width {char} {
error "grapheme_width unimplemented - use ansifreestring_width"
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
#todo - check double-width chars in unicode blocks.. try to do reasonably quicky
#short-circuit basic cases
if {![regexp {[\uFF-\U10FFFF]} $text]} {
#control chars?
#support tcl pre 2023-11 - see regexp bug below
#if {![regexp {[\uFF-\U10FFFF]} $text]} {
# return [string length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
return [string length $text]
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?
#review
#set can_regex_high_unicode [string match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters
set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0000-\u00FF\])+" $text]
set len 0
foreach {uc ascii} $uc_sequences {
#puts "-ascii $ascii"
#puts "-uc $uc"
incr len [string length $ascii]
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - find something that understands grapheme clusters - needed also for grapheme_split
#use punk::char::wcswidth - faster than the string split in textutil::wcswidth but still uses textutil::wcswidth_char
incr len [wcswidth $uc]
}
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
return $len
}
#kept as a fallback for review/test if textutil::wcswidth doesn't do what we require on all terminals.
#this version looks at console testwidths if they've been cached.
#It is relatively fast - but tests unicode widths char by char - so won't be useful going forward for grapheme clusters.
proc ansifreestring_width2 {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
#we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
@ -1831,7 +2012,7 @@ namespace eval punk::char {
#
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count
} else {
#todo - replace with function that doesn't use console - just unicode data
#review
# a)- terminals lie - so we could have a bad cached testwidth
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c]
} else {
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
#review - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
# - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#will accept a single char or a string - test using console cursor position reporting
#unreliable
proc char_info_testwidth {ch {emit 0}} {
package require punk::console
#uses cursor movement and relies on console to report position.. which it may do properly for single chars - but may misreport for combinations that combine into a single glyph
variable ansi_available -1 ;#default -1 for unknown. Leave it this way so test for ansi support is run.
#-1 still evaluates to true - as the modern assumption for ansi availability is true
#only false if ansi_available has been set 0 by test_can_ansi
#support stripansi for legacy windows terminals
# --
variable ansi_wanted 2 ;#2 for default assumed yes, will be set to -1 for automatically unwanted when ansi unavailable values of 0 or 1 won't be autoset
#punk::console namespace - contains *directly* acting functions - some based on ansi escapes from the 'ansi' sub namespace, some on local system calls or executable calls wrapped in the 'local' sub namespace
#directly acting means they write to stdout to cause the console to peform the action, or they perform the action immediately via other means.
@ -55,41 +67,61 @@ namespace eval punk::console {
}
if {"windows" eq $::tcl_platform(platform)} {
proc enableAnsi {} {
#accept args for all dummy/load functions so we don't have to match/update argument signatures here
proc enableAnsi {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableAnsi
tailcall enableAnsi {*}$args
}
#review what raw mode means with regard to a specific channel vs terminal as a whole
proc enableRaw {{channel stdin}} {
proc enableRaw {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableRaw $channel
tailcall enableRaw {*}$args
}
proc disableRaw {{channel stdin}} {
proc disableRaw {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall disableRaw $channel
tailcall disableRaw {*}$args
}
proc enableVirtualTerminal {} {
proc enableVirtualTerminal {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableVirtualTerminal
tailcall enableVirtualTerminal {*}$args
}
proc disableVirtualTerminal {} {
proc disableVirtualTerminal {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall disableVirtualTerminal
tailcall disableVirtualTerminal {*}$args
}
set funcs [list disableAnsi enableProcessedInput disableProcessedInput]
foreach f $funcs {
proc $f {args} [string map [list %f% $f] {
set mybody [info body %f%]
internal::define_windows_procs
set newbody [info body %f%]
if {$newbody ne $mybody} {
tailcall %f% {*}$args
} else {
#error vs noop?
puts stderr "Unable to set implementation for %f% - check twapi?"
}
}]
}
} else {
proc enableAnsi {} {
#todo?
}
proc disableAnsi {} {
}
#todo - something better - the 'channel' concept may not really apply on unix, as raw mode is for input and output modes
error "punk::console::mode expected 'raw' or 'line' or default value 'query'
error "punk::console::mode expected 'raw' or 'line' or default value 'query'"
}
}
@ -211,6 +249,7 @@ namespace eval punk::console {
#enableAnsi seems like it should be directly under punk::console .. but then it seems inconsistent if other local console-mode setting functions aren't.
#Find a compromise to organise things somewhat sensibly..
#this is really enableAnsi *processing*
proc [namespace parent]::enableAnsi {} {
#output handle modes
#Enable virtual terminal processing (sometimes off in older windows terminals)
#review - 1 byte at a time seems inefficient... but we don't have a way to peek or put back chars (?)
#todo - timeout - what if terminal doesn't put data on stdin?
#review - what if we slurp in data meant for main loop? Main loop probably needs to detect these responses and store them for lookup *instead* of this handler
#capturingendregex should capture ANY prefix, whole escape match - and a subcapture of the data we're interested in and match at end of string.
#we need to cooperate with other stdin/$input readers and put data here if we overconsume.
#Main repl reader may be currently active - or may be inactive.
#This call could come from within code called by the main reader - or from user code running while main read-loop is temporarily disabled
#In other contexts there may not even be another input reader
#REVIEW - what if there is existing data in input_chunks_waiting - is it for us?
#temp - let's keep alert to it until we decide if it's legit/required..
if {[info exists input_chunks_waiting($input)] && [llength $input_chunks_waiting($input)]} {
#puts stderr "[punk::ansi::a+ cyan bold]get_ansi_response_payload called while input_chunks_waiting($input) contained data: $input_chunks_waiting($input)[punk::ansi::a]"
}
if {!$::punk::console::ansi_available} {
return ""
}
set callid [info cmdcount] ;#info cmdcount is almost as fast as clock clicks - and whilst not unique in a long-running app(will wrap?) - fine for this context
#we may have consumed all pending input on $input - so there may be no trigger for the readable fileevent
if {[llength $input_chunks_waiting($input)]} {
#This is experimental If a handler is aware of input_chunks_waiting - there should be no need to schedule a trigger
#If it isn't, but the handler can accept an existing chunk of data as an argument - we could trigger and pass it the waiting chunks - but there's no way to know its API.
#we could look at info args - but that's not likely to tell us much in a robust way.
#we could create a reflected channel for stdin? That is potentially an overreach..?
#triggering it manually... as it was already listening - this should generally do no harm as it was the active reader anyway, but won't help with the missing data if it's input_chunks_waiting-unaware.
puts stderr "[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload triggering existing handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
after idle [list after 0 $existing_handler]
}
#Note - we still may be in_repl_handler here (which disables its own reader while executing commandlines)
#The input_chunks_waiting may really belong to the existing_handler we found - but if it doesn't consume them they will end up being read by the repl_handler when it eventually re-enables.
#todo - some better structure than just a list of waiting chunks indexed by input channel, so repl/other handlers can determine the context in which these waiting chunks were generated?
} elseif {[llength $::repl::in_repl_handler]} {
if {[llength $input_chunks_waiting($input)]} {
#don't trigger the repl handler manually - we will inevitably get things out of order - as it knows when to enable/disable itself based on whether chunks are waiting.
#triggering it by putting it on the eventloop will potentially result in re-entrancy
#The cooperating reader must be designed to consume waiting chunks and only reschedule it's channel read handler once all waiting chunks have been consumed.
#puts stderr "[punk::ansi::a+ green bold]--> repl_handler has chunks to consume [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
}
}
catch {
unset accumulator($callid)
unset waitvar($callid)
}
#set punk::console::chunk ""
return $payload
}
#review - reading 1 byte at a time and repeatedly running the small capturing/completion regex seems a little inefficient... but we don't have a way to peek or put back chars (?)
#review (we do have the punk::console::input_chunks_waiting($chan) array to cooperatively put back data - but this won't work for user scripts not aware of this)
#review - timeout - what if terminal doesn't put data on stdin? error vs stderr report vs empty results
#review - Main loop may need to detect some terminal responses and store them for lookup instead-of or as-well-as this handler?
#e.g what happens to mouse-events while user code is executing?
#we may still need this handler if such a loop doesn't exist.
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
set do_bounds_check 0 ;#modified by leading single @ for list operations - doesn't apply to certain items like 'head','tail' which have specifically defined bounds-checks implicit in their normal meaning.
#thse have anyhead and anytail for explicit allowance to be used on lists with insufficient items to produce values.
if {$index eq "#"} {
switch -- $index {
# {
set active_key_type "list"
if {![catch {llength $leveldata} assigned]} {
set already_assigned 1
@ -738,7 +738,8 @@ namespace eval punk {
set action ?mismatch-not-a-list
break
}
} elseif {$index eq "##"} {
}
## {
set active_key_type "dict"
if {![catch {dict size $leveldata} assigned]} {
set already_assigned 1
@ -746,10 +747,12 @@ namespace eval punk {
set action ?mismatch-not-a-dict
break
}
} elseif {$index eq "#?"} {
}
#? {
set assigned [string length $leveldata]
set already_assigned 1
} elseif {$index eq "@"} {
}
@ {
upvar v_list_idx v_list_idx ;#positional tracker for /@ - list position
set active_key_type "list"
#e.g @1/1/@/1 the lone @ is a positional spec for this specific subkey
@ -771,9 +774,10 @@ namespace eval punk {
}
set assigned [lindex $leveldata $index]
set already_assigned 1
} else {
if {$index in [list "@@" "@?@" "@??@"]} {
}
default {
switch -exact -- $index {
@@ - @?@ - @??@ {
set active_key_type "dict"
#NOTE: it may at first seem pointless to use @@/key, since we have to know the key - but this can be used to match 'key' only at the first position in .= list key {x y} key2 etc
@ -810,7 +814,10 @@ namespace eval punk {
}
}
set already_assigned 1
} elseif {[string match @@* $index]} {
}
default {
switch -glob -- $index {
@@* {
set active_key_type "dict"
set key [string range $index 2 end]
#dict exists test is safe - no need for catch
@ -821,7 +828,8 @@ namespace eval punk {
break
}
set already_assigned 1
} elseif {[string match {@\?@*} $index]} {
}
{@\?@*} {
set active_key_type "dict"
set key [string range $index 3 end]
#dict exists test is safe - no need for catch
@ -831,7 +839,8 @@ namespace eval punk {
set assigned [list]
}
set already_assigned 1
} elseif {[string match {@\?\?@*} $index]} {
}
{@\?\?@*} {
set active_key_type "dict"
set key [string range $index 4 end]
#dict exists test is safe - no need for catch
@ -841,38 +850,49 @@ namespace eval punk {
set assigned [list]
}
set already_assigned 1
} elseif {[string match @* $index]} {
}
@* {
set active_key_type "list"
set do_bounds_check 1
set index [string trimleft $index @]
} else {
}
default {
#
}
}
}
}
if {!$already_assigned} {
if {[string match "not-*" $index] && $active_key_type in [list "" "list"]} {
#e.g not-0-end-1 not-end-4-end-2
set get_not 1
#cherry-pick some easy cases, and either assign, or re-map to corresponding index
if {$index eq "not-tail"} {
switch -- $index {
not-tail {
set active_key_type "list"
set assigned [lindex $leveldata 0]; set already_assigned 1
} elseif {$index in [list "not-head" "not-0"]} {
}
not-head {
set active_key_type "list"
#set selector "tail"; set get_not 0
set assigned [lrange $leveldata 1 end]; set already_assigned 1
} elseif {$index eq "not-end"} {
}
not-end {
set active_key_type "list"
set assigned [lrange $leveldata 0 end-1]; set already_assigned 1
} else {
}
default {
#trim off the not- and let the remaining index handle based on get_not being 1
#trim off the not- and let the remaining index handle based on get_not being 1
set index [string range $index 4 end]
append script \n "set index $index"
}
}
}
}
@ -1969,39 +1994,46 @@ namespace eval punk {
lappend var_class [list $v_key 0]
lappend varspecs_trimmed $v_key
} else {
set firstchar [string index $v 0]
set lastchar [string index $v end]
if {$lastchar eq "+"} {
switch -- $lastchar {
+ {
lappend classes 9
set vname [string range $v 0 end-1]
}
if {$lastchar eq "-"} {
- {
lappend classes 10
set vname [string range $v 0 end-1]
}
if {$firstchar in $leading_classifiers} {
if {$firstchar eq "'"} {
}
set firstchar [string index $v 0]
switch -- $firstchar {
' {
lappend var_class [list $v_key 1]
#set vname [string range $v 1 end]
lappend varspecs_trimmed [list $vname $key]
} elseif {$firstchar eq "^"} {
}
^ {
lappend classes [list 2]
#use vname - may already have trailing +/- stripped
set vname [string range $vname 1 end]
set secondclassifier [string index $v 1]
if {$secondclassifier eq "&"} {
switch -- $secondclassifier {
"&" {
#pinned boolean
lappend classes 3
set vname [string range $v 2 end]
} elseif {$secondclassifier eq "#"} {
}
"#" {
#pinned numeric comparison instead of string comparison
lappend classes 8
set vname [string range $vname 1 end]
} elseif {$secondclassifier eq "*"} {
}
"*" {
#pinned glob
lappend classes 7
set vname [string range $v 2 end]
}
}
#todo - check for second tag - & for pinned boolean?
#consider requiring ^# for numeric comparisons. currently no way to do a strictly string comparison on pinned variables.... default ^var really shouldn't be doing any magic.
#while we're at it.. pinned glob would be nice. ^*
@ -2009,7 +2041,8 @@ namespace eval punk {
#These all limit the range of varnames permissible - which is no big deal.
lappend var_class [list $v_key $classes]
lappend varspecs_trimmed [list $vname $key]
} elseif {$firstchar eq "&"} {
}
& {
#we require boolean literals to be single-quoted so we can use cross-binding on boolean vars.
#ie &true is the variable true whereas &'true' or &'1' &'t' etc are literal booleans
#allow exception of &1 &0 to be literal booleans - because we disallow 0 & 1 as varnames in other contexts anyway - so it would be more consistent not to treat as varnames here.
@ -2017,8 +2050,7 @@ namespace eval punk {
set vname [string range $v 1 end]
lappend varspecs_trimmed [list $vname $key]
}
} else {
default {
if {([string first ? $v]) >=0 || ([string first * $v] >=0)} {
lappend var_class [list $v_key 7] ;#glob
#leave vname as the full glob
@ -2049,7 +2081,7 @@ namespace eval punk {
}
}
}
}
}
lappend var_names $vname
}
@ -2662,18 +2694,20 @@ namespace eval punk {
#unpinned non-atoms
#cross-binding. Within this multivar pattern group only (use pin ^ for binding to result from a previous pattern)
#
if {$varname eq ""} {
switch -- $varname {
"" {
#don't attempt cross-bind on empty-varname
lset match_state $i 1
#don't change var_action $i 1 to set
lset expected_values $i [list var $varname spec $lhsspec info "match-no-lhs-var" lhs - rhs $val]
} elseif {$varname eq "_"} {
}
"_" {
#don't cross-bind on the special 'don't-care' varname
lset match_state $i 1
lset var_actions $i 1 [string range $act 1 end] ;# ?set -> set
lset expected_values $i [list var $varname spec $lhsspec info "match-any-lhs-dontcare-var" lhs - rhs $val]
} else {
}
default {
set first_bound [lsearch -index 0 $var_actions $varname]
#assert first_bound >=0, we will always find something - usually self
if {$first_bound == $i} {
@ -2696,7 +2730,7 @@ namespace eval punk {
}
}
}
}
}
incr i
@ -3401,11 +3435,7 @@ namespace eval punk {
return 1
} elseif {[string first " " $arg] >= 0 || [string first \t $arg] >= 0} {
lassign [_rhs_tail_split $arg] _ part2 ;#will have part2 if unquoted whitespace found
if {$part2 eq ""} {
return 0
} else {
return 1
}
return [expr {$part2 ne ""}]
} else {
return 0
}
@ -3426,14 +3456,27 @@ namespace eval punk {
set indq 0
}
} else {
if {$ch eq {'}} {
switch -- $ch {
{'} {
set inq 1
} elseif {$ch eq {"}} {
}
{"} {
set indq 1
} elseif {$ch in [list " " \t]} {
}
" " {
#whitespace outside of quoting
break
}
0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - a - b - c - d - e - f - g - h - i - j - k - l - m - n - o - p - q - r - s - t - u - v - w - x - y - z {}
default {
#\t not a literal for switch jumpTable bytecompile - review - can we do it without for example string mapping to <t> (and without a literal binary tab in source file)?
#we can't (reliably?) put \t as one of our switch keys
#
if {$ch eq "\t"} {
break
}
}
}
append equalsrhs $ch
}
incr i
@ -3455,13 +3498,15 @@ namespace eval punk {
#nextail is tail for possible recursion based on first argument in the segment
set nexttail [lassign $fulltail next1] ;#tail head
if {$next1 eq "pipematch"} {
switch -- $next1 {
pipematch {
set results [uplevel 1 [list pipematch {*}$nexttail]]
#NOTE: casemismatch is part of the api for pipecase. It is a casemismatch rather than an error - because for a pipecase - a casemismatch is an expected event (many casemismatches - one match)
return [dict create casemismatch $result]
}
}
}
#we can't always treat $result as a list - may be an error string which can't be represented as a list, and there may be no useful errorCode
error "punk::args::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'"
#adhoc and currentproject* paths are relative to cwd - so no projectbase information can be stored at registration time.
#not all template item types will need projectbase information - as the item data may be self-contained within the template structure -
#but project_layout will need it - or at least need to know if there is no project - because project_layout data is never stored in the template folder structure directly.
if {$pathtype eq "adhoc"} {
switch -- $pathtype {
adhoc {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "module"} {
}
module {
set provide_statement [package ifneeded $pkg [package require $pkg]]
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
} elseif {$pathtype eq "currentproject"} {
}
currentproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
#currently only intended for punk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
dict set extended_capdict projectbase $projectbase
} elseif {$pathtype eq "absolute"} {
}
absolute {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
dict set extended_capdict projectbase $projectbase
} else {
}
default {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
#major named sets such as unicode blocks, scripts, and other sets such as microsoft WGL4
#case insensitive search - possibly with globs
proc charset_names {{namesearch *}} {
#case insensitive search - possibly with *basic* globs (? and * only - not lb rb)
proc charset_names2 {{namesearch *}} {
variable charsets
set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [dict keys $charsets]]
if {$namesearch eq "*"} {
return $sortedkeys
}
if {[regexp {[?*]} $namesearch]} {
#name glob search
set matched_names [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
set matched [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - stop on first match
if {[llength $matched]} {
return [list $matched]
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
return [list]
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
#todo - provide a char_width equivalent that is optimised for speed
#maint warning - also in overtype!
#intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy)
proc grapheme_width_cached {ch} {
variable grapheme_widths
if {[dict exists $grapheme_widths $ch]} {
return [dict get $grapheme_widths $ch]
}
set width [punk::char::ansifreestring_width $ch] ;#review - can we provide faster version if we know it's a single grapheme rather than a string? (grapheme is still a string as it may have combiners/diacritics)
dict set grapheme_widths $ch $width
return $width
}
#no char_width - use grapheme_width terminology to be clearer
proc grapheme_width {char} {
error "grapheme_width unimplemented - use ansifreestring_width"
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
# -- --- --- --- --- --- ---
#review
#if we strip out ZWJ \u200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth just uses the unicode east-asian width property data and doesn't seem to handle these specially - it counts this joiner and others as one wide (also BOM \uFFEF)
#TODO - once we have proper grapheme cluster splitting - work out which of these characters should be left in and/or when exactly their length-effects apply
#
#for now - strip them out
#ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u200b zero width space
#\uFFEFBOM/ ZWNBSP and others that should be zero width
#todo - work out proper way to mark/group zero width.
set text [string map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text]
# -- --- --- --- --- --- ---
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
#some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all
#we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here
#they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function)
set text [regsub -all {[\u0080-\u009f]+} $text ""]
#short-circuit basic cases
#support tcl pre 2023-11 - see regexp bug below
#if {![regexp {[\uFF-\U10FFFF]} $text]} {
# return [string length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
return [string length $text]
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?
#review
#set can_regex_high_unicode [string match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters
set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0000-\u00FF\])+" $text]
set len 0
foreach {uc ascii} $uc_sequences {
#puts "-ascii $ascii"
#puts "-uc $uc"
incr len [string length $ascii]
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - find something that understands grapheme clusters - needed also for grapheme_split
#use punk::char::wcswidth - faster than the string split in textutil::wcswidth but still uses textutil::wcswidth_char
incr len [wcswidth $uc]
}
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
return $len
}
#kept as a fallback for review/test if textutil::wcswidth doesn't do what we require on all terminals.
#this version looks at console testwidths if they've been cached.
#It is relatively fast - but tests unicode widths char by char - so won't be useful going forward for grapheme clusters.
proc ansifreestring_width2 {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
@ -1831,7 +2030,7 @@ namespace eval punk::char {
#
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#only map control sequences to nothing after processing ones with special effects, such as \b (\x07f)
#Note DEL \x1f will only
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
@ -1856,7 +2062,7 @@ namespace eval punk::char {
return [string length $text]
}
#todo - check double-width chars in unicode blocks.. try to do reasonably quicky
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count
} else {
#todo - replace with function that doesn't use console - just unicode data
#review
# a)- terminals lie - so we could have a bad cached testwidth
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c]
} else {
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
#review - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
# - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#will accept a single char or a string - test using console cursor position reporting
#unreliable
proc char_info_testwidth {ch {emit 0}} {
package require punk::console
#uses cursor movement and relies on console to report position.. which it may do properly for single chars - but may misreport for combinations that combine into a single glyph
set punk::console::tabwidth $w ;#we also attempt to read terminal's tabstops and set tabwidth to the apparent spacing of first non-1 value in tabstops list.
catch {textutil::tabify::untabify2 "" $w} ;#textutil tabify can end up uninitialised and raise errors like "can't read Spaces(<n>).." after a tabstop change This call seems to keep tabify happy - review.
#some terminals (conemu on windows) scroll the viewport when we make a big move down like this - a move to 1 1 immediately after cursor_save doesn't seem to fix that.
#This issue also occurs when switching back from the alternate screen buffer - so perhaps that needs to be addressed elsewhere.
error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower"
}
}
set fmt "%${opt_width}.${opt_width}ll${spec}"
set list_decimals [lmap d $list_decimals[unset list_decimals] {string map [list _ ""] [string trim $d]}]
@ -529,6 +533,81 @@ namespace eval punk::lib {
return $answer
}
#like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter.
proc indent {text {prefix " "}} {
set result [list]
foreach line [split $text \n] {
if {[string trim $line] eq ""} {
lappend result ""
} else {
lappend result $prefix[string trimright $line]
}
}
return [join $result \n]
}
proc undent {text} {
if {$text eq ""} {
return ""
}
set lines [split $text \n]
set nonblank [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
continue
}
lappend nonblank $ln
}
set lcp [longestCommonPrefix $nonblank]
if {$lcp eq ""} {
return $text
}
regexp {^([\t ]*)} $lcp _m lcp
if {$lcp eq ""} {
return $text
}
set len [string length $lcp]
set result [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
lappend result ""
} else {
lappend result [string range $ln $len end]
}
}
return [join $result \n]
}
#A version of textutil::string::longestCommonPrefixList
proc longestCommonPrefix {items} {
if {[llength $items] <= 1} {
return [lindex $items 0]
}
set items [lsort $items[unset items]]
set min [lindex $items 0]
set max [lindex $items end]
#if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list)
#(sort order nothing to do with length - e.g min may be longer than max)
if {[string length $min] > [string length $max]} {
set temp $min
set min $max
set max $temp
}
set n [string length $min]
set prefix ""
set i -1
while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} {
append prefix $c
}
return $prefix
}
#test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty]
error "linelist: unknown -block option value: $bo known values: $known_blockopts"
}
}
}
#normalize certain combos
if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
@ -662,14 +747,20 @@ namespace eval punk::lib {
error "linelist -block triminner not implemented - sorry"
}
}
# -- --- --- --- --- ---
set opt_line [dict get $opts -line]
set known_lineopts [list trimline trimleft trimright]
foreach lo $opt_line {
if {$lo ni $known_lineopts} {
switch -- $lo {
trimline - trimleft - trimright {}
default {
set known_lineopts [list trimline trimleft trimright]
error "linelist: unknown -line option value: $lo known values: $known_lineopts"
}
}
}
#normalize trimleft trimright combo
if {"trimleft" in $opt_line && "trimright" in $opt_line} {
set opt_line [list "trimline"]
@ -777,9 +868,15 @@ namespace eval punk::lib {
set linelist $transformed
} else {
#INLINE punk::ansi::codetype::is_sgr_reset
#regexp {\x1b\[0*m$} $code
set re_is_sgr_reset {\x1b\[0*m$}
#INLINE punk::ansi::codetype::is_sgr
#regexp {\033\[[0-9;:]*m$} $code
set re_is_sgr {\x1b\[[0-9;:]*m$}
foreach ln $linelist {
set is_replay_pure_reset [punk::ansi::codetype::is_sgr_reset $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable
set ansisplits [punk::ansi::ta::split_codes_single $ln]
if {[llength $ansisplits]<= 1} {
@ -819,6 +916,11 @@ namespace eval punk::lib {
set codestack [list $code]
} else {
if {[punk::ansi::codetype::is_sgr $code]} {
#todo - proper test of each code - so we only take latest background/foreground etc.
#requires handling codes with varying numbers of parameters.
#basic simplification - remove straight dupes.
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars.
set codestack [lremove $codestack {*}$dup_posns]
lappend codestack $code
} ;#else gx0 or other code - we don't want to stack it with SGR codes
}
@ -834,7 +936,9 @@ namespace eval punk::lib {
}
}
set newreplay [join $codestack ""]
#set newreplay [join $codestack ""]
set newreplay [punk::ansi::codetype::sgr_merge {*}$codestack]
if {$line_has_sgr && $newreplay ne $replaycodes} {
#adjust if it doesn't already does a reset at start
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} {
#NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least)
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
#stdin with line-mode readable events (at least on windows for Tcl 8.7a6 to 9.0a) can get stuck with bytes pending when input longer than 100chars - even though there is a linefeed further on than that.
#This potentially affects a reasonable number of Tcl8.7 kit/tclsh binaries out in the wild.
#see bug https://core.tcl-lang.org/tcl/tktview/bda99f2393 (gets stdin problem when non-blocking - Windows)
#we haven't put the data following last le into commandstr - but we want to display proper completion status prior to enter being hit or more data coming in.
#this could give spurious results for large pastes where buffering chunks it in odd places.?
#it does however give sensible output for the common case of a small paste where the last line ending wasn't included
set waiting [punk::lib::system::incomplete $commandstr$readingchunk]
if {[dict get $install_record tag] ni [list "QUERY-INPROGRESS" "INSTALL-RECORD" "INSTALL-SKIPPED" "INSTALL-INPROGRESS" "MODIFY-INPROGRESS" "MODIFY-RECORD" "MODIFY-SKIPPED" "VIRTUAL-INPROGRESS" "VIRTUAL-RECORD" "VIRTUAL-SKIPPED" "DELETE-RECORD" "DELETE-INPROGRESS" "DELETE-SKIPPED"]} {
switch -- [dict get $install_record tag] {
"QUERY-INPROGRESS" -
"INSTALL-RECORD" -
"INSTALL-SKIPPED" -
"INSTALL-INPROGRESS" -
"MODIFY-INPROGRESS" -
"MODIFY-RECORD" -
"MODIFY-SKIPPED" -
"VIRTUAL-INPROGRESS" -
"VIRTUAL-RECORD" -
"VIRTUAL-SKIPPED" -
"DELETE-RECORD" -
"DELETE-INPROGRESS" -
"DELETE-SKIPPED" {
}
default {
error "file_install_record_source_changes bad install record: tag '[dict get $install_record tag]' not INSTALL-RECORD|SKIPPED|INSTALL-INPROGRESS|MODIFY-RECORD|MODIFY-INPROGRESS|VIRTUAL-RECORD|VIRTUAL-INPROGRESS|DELETE-RECORD|DELETE-INPROGRESS"
}
}
set source_list [dict_getwithdefault $install_record body [list]]
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~).
dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"]
# but here it may help detect unexpected changes during lifetime of the stack and avoids the chance of callers incorrectly using the transform handle?)
#todo - ensure we can handle 2> file (space after >)
if {[string trim [lindex $commandlist end]] eq "&"} {
#review - reconsider the handling of redirections such that tcl-style are handled totally separately to other shell syntaxes!
#
#note 2>@1 must ocur as last word for tcl - but 2@stdout can occur elsewhere
#(2>@stdout echoes to main stdout - not into pipeline)
#To properly do pipelines it looks like we will have to split on | and call this proc multiple times and wire it up accordingly (presumably in separate threads)
switch -- [string trim $lastitem] {
{&} {
set name [lindex $commandlist 0]
#background execution - stdout and stderr from child still comes here - but process is backgrounded
#FIX! - this is broken for paths with backslashes for example
#review - reconsider the handling of redirections such that tcl-style are handled totally separately to other shell syntaxes!
#
#note 2>@1 must ocur as last word for tcl - but 2@stdout can occur elsewhere
#(2>@stdout echoes to main stdout - not into pipeline)
#To properly do pipelines it looks like we will have to split on | and call this proc multiple times and wire it up accordingly (presumably in separate threads)
set custom_stderr ""
if {[string trim $lastitem] in [list {2>&1} {2>@1}]} {
#we leave stdout without imposed ansi colouring - because the source may be colourised
#stderr might have source colouring - but it usually doesn't seem to, and the visual distiction of red stderr is very handy for the run command.
#we leave stdout without imposed ansi colouring - because the source may be colourised and because ansi-wrapping a stream at whatever boundaries it comes in at isn't a really nice thing to do.
#stderr might have source colouring - but it usually doesn't seem to, and the visual distiction of red stderr can be very handy for the run command.
#A further enhancement could be to detect well-known options such as --color and/or use a configuration for specific commands that have useful colourised stderr,
#but defaulting stderr to red is a compromise.
#but having an option to configure stderr to red is a compromise.
#Note that the other run commands, runout,runerr, runx don't emit in real-time - so for those commands there may be options to detect and/or post-process stdout and stderr.
#TODO - fix. This has no effect because the repl adds an ansiwrap transform
#TODO - fix. This has no effect if/when the repl adds an ansiwrap transform
# what we probably want to do is 'aside' that transform for runxxx commands only.
#return a homogenous block of characters - ie lines all same length, all same character
#printing-width in terminal columns is not necessarily same as blockwidth even if a single char is passed (could be full-width unicode character)
#This can have ansi SGR codes - but to repeat blocks containing ansi-movements, the block should first be rendered to a static output with something like overtype::left
proc block {blockwidth blockheight {char " "}} {
if {$blockwidth < 0} {
error "textblock::block blockwidth must be an integer greater than or equal to zero"
@ -50,15 +51,17 @@ namespace eval textblock {
error "textblock::block blockheight must be a positive integer"
}
if {$char eq ""} {return ""}
#using string length is ok
if {[string length $char] == 1} {
set row [string repeat $char $blockwidth]
set mtrx [lrepeat $blockheight $row]
return [::join $mtrx \n]
} else {
set charblock [string map [list \r\n \n] $char]
if {[string first \n $charblock] >= 0} {
if {[string last \n $charblock] >= 0} {
if {$blockwidth > 1} {
set row [.= val $charblock {*}[lrepeat [expr {$blockwidth -1}] |> piper_blockjoin $charblock]]
#set row [.= val $charblock {*}[lrepeat [expr {$blockwidth -1}] |> piper_blockjoin $charblock]] ;#building a repeated "|> command arg" list to evaluate as a pipeline. (from before textblock::join could take arbitrary num of blocks )
set row [textblock::join {*}[lrepeat $blockwidth $charblock]]
} else {
set row $charblock
}
@ -72,33 +75,33 @@ namespace eval textblock {
#todo - consider 'elastic tabstops' for textblocks where tab acts as a column separator and adjacent lines with the same number of tabs form a sort of table
proc width {textblock} {
#backspaces, vertical tabs,carriage returns
#backspaces, vertical tabs ?
if {$textblock eq ""} {
return 0
}
#textutil::tabify is a reasonable hack when there are no ansi SGR codes - but probably not always what we want even then - review
set textblock [textutil::tabify::untabify2 $textblock]
if {[string first \n $textblock] >= 0} {
return [tcl::mathfunc::max {*}[lmap v [punk::lib::lines_as_list -- $textblock] {::punk::char::ansifreestring_width [stripansi $v]}]]