error "punk::args::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'"
}
dict for {spec specval} $argspecs {
if {$spec ni [concat $known_argspecs -ARGTYPE]} {
error "punk::args::opts_values - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs"
#adhoc and currentproject* paths are relative to cwd - so no projectbase information can be stored at registration time.
#not all template item types will need projectbase information - as the item data may be self-contained within the template structure -
#but project_layout will need it - or at least need to know if there is no project - because project_layout data is never stored in the template folder structure directly.
if {$pathtype eq "adhoc"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "module"} {
set provide_statement [package ifneeded $pkg [package require $pkg]]
set tmfile [lindex $provide_statement end]
if {![file exists $tmfile]} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability"
return 0
switch -- $pathtype {
adhoc {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
}
module {
set provide_statement [package ifneeded $pkg [package require $pkg]]
set tmfile [lindex $provide_statement end]
if {![file exists $tmfile]} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
}
set tmfolder [file dirname $tmfile]
#todo - handle wrapped or unwrapped tarjar files - in which case we have to adjust tmfolder appropriately
#set tpath [file normalize [file join $tmfile [dict get $capdict relpath]]] ;#relpath is relative to the tm *file* - not it's containing folder
set projectinfo [punk::repo::find_repos $tmfolder]
set projectbase [dict get $projectinfo closest]
#store the projectbase even if it's empty string
set extended_capdict $capdict
set resolved_path [file join $tmfolder $path]
dict set extended_capdict resolved_path $resolved_path
dict set extended_capdict projectbase $projectbase
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
}
set tmfolder [file dirname $tmfile]
#todo - handle wrapped or unwrapped tarjar files - in which case we have to adjust tmfolder appropriately
#set tpath [file normalize [file join $tmfile [dict get $capdict relpath]]] ;#relpath is relative to the tm *file* - not it's containing folder
set projectinfo [punk::repo::find_repos $tmfolder]
set projectbase [dict get $projectinfo closest]
#store the projectbase even if it's empty string
set extended_capdict $capdict
set resolved_path [file join $tmfolder $path]
dict set extended_capdict resolved_path $resolved_path
dict set extended_capdict projectbase $projectbase
}
currentproject_multivendor {
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
} elseif {$pathtype eq "currentproject"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
set extended_capdict $capdict
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
}
#verify that the relative path is within the relative path of a currentproject_multivendor tree
#todo - api for the _multivendor tree controlling package to validate
currentproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
#verify that the relative path is within the relative path of a currentproject_multivendor tree
#todo - api for the _multivendor tree controlling package to validate
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "shellproject"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
#currentlyonlyintendedforpunk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
shellproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
set projectinfo [punk::repo::find_repos $shellbase]
set projectbase [dict get $projectinfo closest]
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
shellproject_multivendor {
#currently only intended for punk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
set projectinfo [punk::repo::find_repos $shellbase]
set projectbase [dict get $projectinfo closest]
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
} elseif {$pathtype eq "absolute"} {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
return 0
absolute {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
return 0
}
set normpath [file normalize $path]
if {!file exists $normpath} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' which doesn't seem to exist"
return 0
}
set projectinfo [punk::repo::find_repos $normpath]
set projectbase [dict get $projectinfo closest]
#todo - verify no other provider has registered same absolute path - if sharing a project-external location is needed - they need their own subfolder
set extended_capdict $capdict
dict set extended_capdict resolved_path $normpath
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
}
set normpath [file normalize $path]
if {!file exists $normpath} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' which doesn't seem to exist"
default {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
return 0
}
set projectinfo [punk::repo::find_repos $normpath]
set projectbase [dict get $projectinfo closest]
#todo - verify no other provider has registered same absolute path - if sharing a project-external location is needed - they need their own subfolder
set extended_capdict $capdict
dict set extended_capdict resolved_path $normpath
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
} else {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
dict set returninfo desc [dict get $charinfo $dec_char desc]
} else {
dict set returninfo desc ""
}
}
if {"short" in $fields} {
if {[dict exists $charinfo $dec_char short]} {
dict set returninfo desc [dict get $charinfo $dec_char short]
} else {
dict set returninfo short ""
}
}
#todo - expectedwidth - lookup the printing width it is *supposed* to have from unicode tables
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
if {"testwidth" in $fields} {
set existing_testwidth ""
if {[dict exists $charinfo $dec_char testwidth]} {
set existing_testwidth [dict get $charinfo $dec_char testwidth]
}
if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char]
set chwidth [char_info_testwidth $char]
dict set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
dict set charinfo $dec_char testwidth $chwidth
} else {
dict set returninfo testwidth $existing_testwidth
}
}
if {"char" in $fields} {
set char [format %c $dec_char]
dict set returninfo char $char
}
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
if {"memberof" in $fields} {
set memberof [list]
dict for {setname setinfo} $charsets {
foreach r [dict get $setinfo ranges] {
set s [dict get $r start]
set e [dict get $r end]
if {$dec_char >= $s && $dec_char <= $e} {
lappend memberof $setname
break
foreach f $fields {
switch -- $f {
dec {
dict set returninfo dec $dec_char
}
hex {
dict set returninfo hex $hex_char
}
desc {
if {[dict exists $charinfo $dec_char desc]} {
dict set returninfo desc [dict get $charinfo $dec_char desc]
} else {
dict set returninfo desc ""
}
}
short {
if {[dict exists $charinfo $dec_char short]} {
dict set returninfo desc [dict get $charinfo $dec_char short]
} else {
dict set returninfo short ""
}
}
testwidth {
#todo - expectedwidth - lookup the printing width it is *supposed* to have from unicode tables
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
set existing_testwidth ""
if {[dict exists $charinfo $dec_char testwidth]} {
set existing_testwidth [dict get $charinfo $dec_char testwidth]
}
if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char]
set chwidth [char_info_testwidth $char]
dict set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
dict set charinfo $dec_char testwidth $chwidth
} else {
dict set returninfo testwidth $existing_testwidth
}
}
char {
set char [format %c $dec_char]
dict set returninfo char $char
}
memberof {
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
#major named sets such as unicode blocks, scripts, and other sets such as microsoft WGL4
#case insensitive search - possibly with globs
proc charset_names {{namesearch *}} {
#case insensitive search - possibly with *basic* globs (? and * only - not lb rb)
proc charset_names2 {{namesearch *}} {
variable charsets
set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [dict keys $charsets]]
if {$namesearch eq "*"} {
return $sortedkeys
}
if {[regexp {[?*]} $namesearch]} {
#name glob search
set matched_names [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
set matched [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - stop on first match
if {[llength $matched]} {
return [list $matched]
} else {
return [list]
}
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
#review is detecting \033 enough? what about 8-bit escapes?
#maint warning - also in overtype!
#intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy)
proc grapheme_width_cached {ch} {
variable grapheme_widths
if {[dict exists $grapheme_widths $ch]} {
return [dict get $grapheme_widths $ch]
}
set width [punk::char::ansifreestring_width $ch] ;#review - can we provide faster version if we know it's a single grapheme rather than a string? (grapheme is still a string as it may have combiners/diacritics)
dict set grapheme_widths $ch $width
return $width
}
#no char_width - use grapheme_width terminology to be clearer
proc grapheme_width {char} {
error "grapheme_width unimplemented - use ansifreestring_width"
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
#todo - check double-width chars in unicode blocks.. try to do reasonably quicky
#short-circuit basic cases
if {![regexp {[\uFF-\U10FFFF]} $text]} {
#control chars?
#support tcl pre 2023-11 - see regexp bug below
#if {![regexp {[\uFF-\U10FFFF]} $text]} {
# return [string length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
return [string length $text]
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?
#review
#set can_regex_high_unicode [string match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters
set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0000-\u00FF\])+" $text]
set len 0
foreach {uc ascii} $uc_sequences {
#puts "-ascii $ascii"
#puts "-uc $uc"
incr len [string length $ascii]
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - find something that understands grapheme clusters - needed also for grapheme_split
#use punk::char::wcswidth - faster than the string split in textutil::wcswidth but still uses textutil::wcswidth_char
incr len [wcswidth $uc]
}
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
return $len
}
#kept as a fallback for review/test if textutil::wcswidth doesn't do what we require on all terminals.
#this version looks at console testwidths if they've been cached.
#It is relatively fast - but tests unicode widths char by char - so won't be useful going forward for grapheme clusters.
proc ansifreestring_width2 {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
#we can c0 control characters after or while processing ansi escapes.
#we need to map remaining control characters to zero-width (under assumption we're not using a font/codepage that displays them - review!)
#anyway - we must allow things like raw ESC,DEL, NUL etc to pass through without error
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
@ -1831,7 +2012,7 @@ namespace eval punk::char {
#
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count
} else {
#review
# a)- terminals lie - so we could have a bad cached testwidth
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c]
if {$width == 0} {
incr zerowidth_char_count
} elseif {$width == 2} {
incr doublewidth_char_count
}
} else {
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#review - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
# - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#will accept a single char or a string - test using console cursor position reporting
#unreliable
proc char_info_testwidth {ch {emit 0}} {
package require punk::console
#uses cursor movement and relies on console to report position.. which it may do properly for single chars - but may misreport for combinations that combine into a single glyph
variable ansi_available -1 ;#default -1 for unknown. Leave it this way so test for ansi support is run.
#-1 still evaluates to true - as the modern assumption for ansi availability is true
#only false if ansi_available has been set 0 by test_can_ansi
#support stripansi for legacy windows terminals
# --
variable ansi_wanted 2 ;#2 for default assumed yes, will be set to -1 for automatically unwanted when ansi unavailable values of 0 or 1 won't be autoset
#punk::console namespace - contains *directly* acting functions - some based on ansi escapes from the 'ansi' sub namespace, some on local system calls or executable calls wrapped in the 'local' sub namespace
#directly acting means they write to stdout to cause the console to peform the action, or they perform the action immediately via other means.
@ -55,41 +67,61 @@ namespace eval punk::console {
}
if {"windows" eq $::tcl_platform(platform)} {
proc enableAnsi {} {
#accept args for all dummy/load functions so we don't have to match/update argument signatures here
proc enableAnsi {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableAnsi
tailcall enableAnsi {*}$args
}
#review what raw mode means with regard to a specific channel vs terminal as a whole
proc enableRaw {{channel stdin}} {
proc enableRaw {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableRaw $channel
tailcall enableRaw {*}$args
}
proc disableRaw {{channel stdin}} {
proc disableRaw {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall disableRaw $channel
tailcall disableRaw {*}$args
}
proc enableVirtualTerminal {} {
proc enableVirtualTerminal {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall enableVirtualTerminal
tailcall enableVirtualTerminal {*}$args
}
proc disableVirtualTerminal {} {
proc disableVirtualTerminal {args} {
#loopavoidancetoken (don't remove)
internal::define_windows_procs
internal::abort_if_loop
tailcall disableVirtualTerminal
tailcall disableVirtualTerminal {*}$args
}
set funcs [list disableAnsi enableProcessedInput disableProcessedInput]
foreach f $funcs {
proc $f {args} [string map [list %f% $f] {
set mybody [info body %f%]
internal::define_windows_procs
set newbody [info body %f%]
if {$newbody ne $mybody} {
tailcall %f% {*}$args
} else {
#error vs noop?
puts stderr "Unable to set implementation for %f% - check twapi?"
}
}]
}
} else {
proc enableAnsi {} {
#todo?
}
proc disableAnsi {} {
}
#todo - something better - the 'channel' concept may not really apply on unix, as raw mode is for input and output modes
error "punk::console::mode expected 'raw' or 'line' or default value 'query'
error "punk::console::mode expected 'raw' or 'line' or default value 'query'"
}
}
@ -211,6 +249,7 @@ namespace eval punk::console {
#enableAnsi seems like it should be directly under punk::console .. but then it seems inconsistent if other local console-mode setting functions aren't.
#Find a compromise to organise things somewhat sensibly..
#this is really enableAnsi *processing*
proc [namespace parent]::enableAnsi {} {
#output handle modes
#Enable virtual terminal processing (sometimes off in older windows terminals)
#review - 1 byte at a time seems inefficient... but we don't have a way to peek or put back chars (?)
#todo - timeout - what if terminal doesn't put data on stdin?
#review - what if we slurp in data meant for main loop? Main loop probably needs to detect these responses and store them for lookup *instead* of this handler
#capturingendregex should capture ANY prefix, whole escape match - and a subcapture of the data we're interested in and match at end of string.
#we need to cooperate with other stdin/$input readers and put data here if we overconsume.
#Main repl reader may be currently active - or may be inactive.
#This call could come from within code called by the main reader - or from user code running while main read-loop is temporarily disabled
#In other contexts there may not even be another input reader
#REVIEW - what if there is existing data in input_chunks_waiting - is it for us?
#temp - let's keep alert to it until we decide if it's legit/required..
if {[info exists input_chunks_waiting($input)] && [llength $input_chunks_waiting($input)]} {
#puts stderr "[punk::ansi::a+ cyan bold]get_ansi_response_payload called while input_chunks_waiting($input) contained data: $input_chunks_waiting($input)[punk::ansi::a]"
}
if {!$::punk::console::ansi_available} {
return ""
}
set callid [info cmdcount] ;#info cmdcount is almost as fast as clock clicks - and whilst not unique in a long-running app(will wrap?) - fine for this context
#we may have consumed all pending input on $input - so there may be no trigger for the readable fileevent
if {[llength $input_chunks_waiting($input)]} {
#This is experimental If a handler is aware of input_chunks_waiting - there should be no need to schedule a trigger
#If it isn't, but the handler can accept an existing chunk of data as an argument - we could trigger and pass it the waiting chunks - but there's no way to know its API.
#we could look at info args - but that's not likely to tell us much in a robust way.
#we could create a reflected channel for stdin? That is potentially an overreach..?
#triggering it manually... as it was already listening - this should generally do no harm as it was the active reader anyway, but won't help with the missing data if it's input_chunks_waiting-unaware.
puts stderr "[punk::ansi::a+ yellow bold]-->punk::console::get_ansi_response_payload triggering existing handler while over-read data is in punk::console::input_chunks_waiting($input) instead of channel [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
after idle [list after 0 $existing_handler]
}
#Note - we still may be in_repl_handler here (which disables its own reader while executing commandlines)
#The input_chunks_waiting may really belong to the existing_handler we found - but if it doesn't consume them they will end up being read by the repl_handler when it eventually re-enables.
#todo - some better structure than just a list of waiting chunks indexed by input channel, so repl/other handlers can determine the context in which these waiting chunks were generated?
} elseif {[llength $::repl::in_repl_handler]} {
if {[llength $input_chunks_waiting($input)]} {
#don't trigger the repl handler manually - we will inevitably get things out of order - as it knows when to enable/disable itself based on whether chunks are waiting.
#triggering it by putting it on the eventloop will potentially result in re-entrancy
#The cooperating reader must be designed to consume waiting chunks and only reschedule it's channel read handler once all waiting chunks have been consumed.
#puts stderr "[punk::ansi::a+ green bold]--> repl_handler has chunks to consume [ansistring VIEW $input_chunks_waiting($input)][punk::ansi::a]"
}
}
catch {
unset accumulator($callid)
unset waitvar($callid)
}
#set punk::console::chunk ""
return $payload
}
#review - reading 1 byte at a time and repeatedly running the small capturing/completion regex seems a little inefficient... but we don't have a way to peek or put back chars (?)
#review (we do have the punk::console::input_chunks_waiting($chan) array to cooperatively put back data - but this won't work for user scripts not aware of this)
#review - timeout - what if terminal doesn't put data on stdin? error vs stderr report vs empty results
#review - Main loop may need to detect some terminal responses and store them for lookup instead-of or as-well-as this handler?
#e.g what happens to mouse-events while user code is executing?
#we may still need this handler if such a loop doesn't exist.
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions
lassign [split $index -] A B
if {$A eq "end"} {
set index [expr {$max - $B}]
} else {
set index [expr {$A - $B}]
switch -glob -- $index {
end {
set index $max
}
} elseif {[string match "*+*" $index]} {
lassign [split $index +] A B
if {$A eq "end"} {
#review - this will just result in out of bounds error in final test - as desired
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all.
set index [expr {$max + $B}]
} else {
set index [expr {$A + $B}]
"*-*" {
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions
lassign [split $index -] A B
if {$A eq "end"} {
set index [expr {$max - $B}]
} else {
set index [expr {$A - $B}]
}
}
} else {
#May be something like +2 or -0 which braced expr can hanle
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources.
if {[catch {expr {$index}} index]} {
#could be end+x - but we don't want out of bounds to be valid
#set it to something that the final bounds expr test can deal with
set index Inf
"*+*" {
lassign [split $index +] A B
if {$A eq "end"} {
#review - this will just result in out of bounds error in final test - as desired
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all.
set index [expr {$max + $B}]
} else {
set index [expr {$A + $B}]
}
}
default {
#May be something like +2 or -0 which braced expr can hanle
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources.
if {[catch {expr {$index}} index]} {
#could be end+x - but we don't want out of bounds to be valid
#set it to something that the final bounds expr test can deal with
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
set a [llength $all_flagged] ;#index into all_flagged list we are building
if {$type eq "soloflag"} {
if {[dict exists $seenflag $val]} {
set seenindex [dict get $seenflag $val]
set seenindexplus [expr {$seenindex+1}]
set existingvals [lindex $all_flagged $seenindexplus]
lappend existingvals 1 ;#1 indicating presence - stored as list rather than a count. todo: consider global or per-solo options to support incrementing instead?
lset all_flagged $seenindexplus $existingvals
} else {
dict set seenflag $val $a
lappend all_flagged $val 1
switch -- $type {
soloflag {
if {[dict exists $seenflag $val]} {
set seenindex [dict get $seenflag $val]
set seenindexplus [expr {$seenindex+1}]
set existingvals [lindex $all_flagged $seenindexplus]
lappend existingvals 1 ;#1 indicating presence - stored as list rather than a count. todo: consider global or per-solo options to support incrementing instead?
lset all_flagged $seenindexplus $existingvals
} else {
dict set seenflag $val $a
lappend all_flagged $val 1
}
}
} elseif {$type eq "flag"} {
if {![dict exists $seenflag $val]} {
dict set seenflag $val $a
lappend all_flagged $val
flag {
if {![dict exists $seenflag $val]} {
dict set seenflag $val $a
lappend all_flagged $val
}
#no need to do anything if already seen - flagvalue must be next, and it will work out where to go.
}
#no need to do anything if already seen - flagvalue must be next, and it will work out where to go.
} elseif {$type eq "flagvalue"} {
set idxflagfor [expr {$k -1}]
set flagforinfo [dict get $o_map $idxflagfor]
lassign $flagforinfo ffclass fftype ffval
#jn "--" following a flag could result in us getting here accidentaly.. review
set seenindex [dict get $seenflag $ffval]
if {$seenindex == [expr {$a-1}]} {
#usual case - this is a flagvalue following the first instance of the flag
lappend all_flagged $val
} else {
#write the value back to the seenindex+1
set seenindexplus [expr {$seenindex+1}]
set existingvals [lindex $all_flagged $seenindexplus]
lappend existingvals $val ;#we keep multiples as a list
lset all_flagged $seenindexplus $existingvals
flagvalue {
set idxflagfor [expr {$k -1}]
set flagforinfo [dict get $o_map $idxflagfor]
lassign $flagforinfo ffclass fftype ffval
#jn "--" following a flag could result in us getting here accidentaly.. review
set seenindex [dict get $seenflag $ffval]
if {$seenindex == [expr {$a-1}]} {
#usual case - this is a flagvalue following the first instance of the flag
lappend all_flagged $val
} else {
#write the value back to the seenindex+1
set seenindexplus [expr {$seenindex+1}]
set existingvals [lindex $all_flagged $seenindexplus]
lappend existingvals $val ;#we keep multiples as a list
error "punk::args::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'"
}
dict for {spec specval} $argspecs {
if {$spec ni [concat $known_argspecs -ARGTYPE]} {
error "punk::args::opts_values - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs"
#adhoc and currentproject* paths are relative to cwd - so no projectbase information can be stored at registration time.
#not all template item types will need projectbase information - as the item data may be self-contained within the template structure -
#but project_layout will need it - or at least need to know if there is no project - because project_layout data is never stored in the template folder structure directly.
if {$pathtype eq "adhoc"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "module"} {
set provide_statement [package ifneeded $pkg [package require $pkg]]
set tmfile [lindex $provide_statement end]
if {![file exists $tmfile]} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability"
return 0
switch -- $pathtype {
adhoc {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
}
module {
set provide_statement [package ifneeded $pkg [package require $pkg]]
set tmfile [lindex $provide_statement end]
if {![file exists $tmfile]} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - unable to determine base folder for package '$pkg' which is attempting to register with punk::cap as a provider of '$capname' capability"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
}
set tmfolder [file dirname $tmfile]
#todo - handle wrapped or unwrapped tarjar files - in which case we have to adjust tmfolder appropriately
#set tpath [file normalize [file join $tmfile [dict get $capdict relpath]]] ;#relpath is relative to the tm *file* - not it's containing folder
set projectinfo [punk::repo::find_repos $tmfolder]
set projectbase [dict get $projectinfo closest]
#store the projectbase even if it's empty string
set extended_capdict $capdict
set resolved_path [file join $tmfolder $path]
dict set extended_capdict resolved_path $resolved_path
dict set extended_capdict projectbase $projectbase
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
}
set tmfolder [file dirname $tmfile]
#todo - handle wrapped or unwrapped tarjar files - in which case we have to adjust tmfolder appropriately
#set tpath [file normalize [file join $tmfile [dict get $capdict relpath]]] ;#relpath is relative to the tm *file* - not it's containing folder
set projectinfo [punk::repo::find_repos $tmfolder]
set projectbase [dict get $projectinfo closest]
#store the projectbase even if it's empty string
set extended_capdict $capdict
set resolved_path [file join $tmfolder $path]
dict set extended_capdict resolved_path $resolved_path
dict set extended_capdict projectbase $projectbase
}
currentproject_multivendor {
#currently only intended for punk::mix::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
set extended_capdict $capdict
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
} elseif {$pathtype eq "currentproject"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
set extended_capdict $capdict
dict set extended_capdict vendor $vendor ;#vendor key still required.. controlling vendor?
}
#verify that the relative path is within the relative path of a currentproject_multivendor tree
#todo - api for the _multivendor tree controlling package to validate
currentproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
}
#verify that the relative path is within the relative path of a currentproject_multivendor tree
#todo - api for the _multivendor tree controlling package to validate
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
} elseif {$pathtype eq "shellproject"} {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
#currentlyonlyintendedforpunk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
shellproject {
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
set projectinfo [punk::repo::find_repos $shellbase]
set projectbase [dict get $projectinfo closest]
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
return 0
shellproject_multivendor {
#currently only intended for punk::templates - review if 3rd party _multivendor trees even make sense
if {$pkg ni $multivendor_package_whitelist} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but package is not in whitelist $multivendor_package_whitelist - 3rd party _multivendor tree not supported"
return 0
}
if {[file pathtype $path] ne "relative"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be a relative path"
set projectinfo [punk::repo::find_repos $shellbase]
set projectbase [dict get $projectinfo closest]
set extended_capdict $capdict
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
} elseif {$pathtype eq "absolute"} {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
return 0
absolute {
if {[file pathtype $path] ne "absolute"} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' of type $pathtype which doesn't seem to be absolute"
return 0
}
set normpath [file normalize $path]
if {!file exists $normpath} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' which doesn't seem to exist"
return 0
}
set projectinfo [punk::repo::find_repos $normpath]
set projectbase [dict get $projectinfo closest]
#todo - verify no other provider has registered same absolute path - if sharing a project-external location is needed - they need their own subfolder
set extended_capdict $capdict
dict set extended_capdict resolved_path $normpath
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
}
set normpath [file normalize $path]
if {!file exists $normpath} {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' which doesn't seem to exist"
default {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
return 0
}
set projectinfo [punk::repo::find_repos $normpath]
set projectbase [dict get $projectinfo closest]
#todo - verify no other provider has registered same absolute path - if sharing a project-external location is needed - they need their own subfolder
set extended_capdict $capdict
dict set extended_capdict resolved_path $normpath
dict set extended_capdict vendor $vendor
dict set extended_capdict projectbase $projectbase
} else {
puts stderr "punk::cap::handlers::templates::capsystem::pkg_register WARNING - package '$pkg' is attempting to register with punk::cap as a provider of '$capname' capability but provided a path '$path' with unrecognised type $pathtype"
#dependency on tcllib not ideal for bootstrapping as punk::char is core to many features.. (textutil::wcswidth is therefore included in bootsupport/include_modules.config) review
dict set returninfo desc [dict get $charinfo $dec_char desc]
} else {
dict set returninfo desc ""
}
}
if {"short" in $fields} {
if {[dict exists $charinfo $dec_char short]} {
dict set returninfo desc [dict get $charinfo $dec_char short]
} else {
dict set returninfo short ""
}
}
#todo - expectedwidth - lookup the printing width it is *supposed* to have from unicode tables
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
if {"testwidth" in $fields} {
set existing_testwidth ""
if {[dict exists $charinfo $dec_char testwidth]} {
set existing_testwidth [dict get $charinfo $dec_char testwidth]
}
if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char]
set chwidth [char_info_testwidth $char]
dict set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
dict set charinfo $dec_char testwidth $chwidth
} else {
dict set returninfo testwidth $existing_testwidth
}
}
if {"char" in $fields} {
set char [format %c $dec_char]
dict set returninfo char $char
}
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
if {"memberof" in $fields} {
set memberof [list]
dict for {setname setinfo} $charsets {
foreach r [dict get $setinfo ranges] {
set s [dict get $r start]
set e [dict get $r end]
if {$dec_char >= $s && $dec_char <= $e} {
lappend memberof $setname
break
foreach f $fields {
switch -- $f {
dec {
dict set returninfo dec $dec_char
}
hex {
dict set returninfo hex $hex_char
}
desc {
if {[dict exists $charinfo $dec_char desc]} {
dict set returninfo desc [dict get $charinfo $dec_char desc]
} else {
dict set returninfo desc ""
}
}
short {
if {[dict exists $charinfo $dec_char short]} {
dict set returninfo desc [dict get $charinfo $dec_char short]
} else {
dict set returninfo short ""
}
}
testwidth {
#todo - expectedwidth - lookup the printing width it is *supposed* to have from unicode tables
#testwidth is one of the main ones likely to be worthwhile excluding as querying the console via ansi takes time
set existing_testwidth ""
if {[dict exists $charinfo $dec_char testwidth]} {
set existing_testwidth [dict get $charinfo $dec_char testwidth]
}
if {$existing_testwidth eq ""} {
#no cached data - do expensive cursor-position test (Note this might not be 100% reliable - terminals lie e.g if ansi escape sequence has set to wide printing.)
set char [format %c $dec_char]
set chwidth [char_info_testwidth $char]
dict set returninfo testwidth $chwidth
#cache it. todo - -verify flag to force recalc in case font/terminal changed in some way?
dict set charinfo $dec_char testwidth $chwidth
} else {
dict set returninfo testwidth $existing_testwidth
}
}
char {
set char [format %c $dec_char]
dict set returninfo char $char
}
memberof {
#memberof takes in the order of a few hundred microseconds if a simple scan of all ranges is taken - possibly worthwhile caching/optimising
#note that memberof is not just unicode blocks - but scripts and other non-contiguous sets consisting of multiple ranges - some of which may include ranges of a single character. (e.g WGL4)
#This means there probably isn't a really efficient means of calculating membership other than scanning all the defined ranges.
#We could potentially populate it using a side-thread - but it seems reasonable to just cache result after first use here.
#some efficiency could also be gained by pre-calculating the extents for each charset which isn't a simple unicode block. (and perhaps sorting by max char)
#major named sets such as unicode blocks, scripts, and other sets such as microsoft WGL4
#case insensitive search - possibly with globs
proc charset_names {{namesearch *}} {
#case insensitive search - possibly with *basic* globs (? and * only - not lb rb)
proc charset_names2 {{namesearch *}} {
variable charsets
set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
#dictionary sorting of the keys is slow! - we should obviously store it in sorted order instead of sorting entire list on retrieval - or just sort results
#set sortedkeys [lsort -increasing -dictionary [dict keys $charsets]] ;#NOTE must use -dictionary to use -sorted flag below
set sortedkeys [lsort -increasing [dict keys $charsets]]
if {$namesearch eq "*"} {
return $sortedkeys
}
if {[regexp {[?*]} $namesearch]} {
#name glob search
set matched_names [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
return [lsearch -all -inline -nocase $sortedkeys $namesearch] ;#no point using -sorted flag when -all is used
} else {
set matched [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - stop on first match
if {[llength $matched]} {
return [list $matched]
} else {
return [list]
}
#return [lsearch -sorted -inline -nocase $sortedkeys $namesearch] ;#no globs - bails out earlier if -sorted?
#todo - provide a char_width equivalent that is optimised for speed
#maint warning - also in overtype!
#intended for single grapheme - but will work for multiple
#cannot contain ansi or newlines
#(a cache of ansifreestring_width calls - as these are quite regex heavy)
proc grapheme_width_cached {ch} {
variable grapheme_widths
if {[dict exists $grapheme_widths $ch]} {
return [dict get $grapheme_widths $ch]
}
set width [punk::char::ansifreestring_width $ch] ;#review - can we provide faster version if we know it's a single grapheme rather than a string? (grapheme is still a string as it may have combiners/diacritics)
dict set grapheme_widths $ch $width
return $width
}
#no char_width - use grapheme_width terminology to be clearer
proc grapheme_width {char} {
error "grapheme_width unimplemented - use ansifreestring_width"
#todo - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
# -- --- --- --- --- --- ---
#review
#if we strip out ZWJ \u200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth just uses the unicode east-asian width property data and doesn't seem to handle these specially - it counts this joiner and others as one wide (also BOM \uFFEF)
#TODO - once we have proper grapheme cluster splitting - work out which of these characters should be left in and/or when exactly their length-effects apply
#
#for now - strip them out
#ZWNJ \u200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#ZWSP \u200b zero width space
#\uFFEFBOM/ ZWNBSP and others that should be zero width
#todo - work out proper way to mark/group zero width.
set text [string map [list \u200b "" \u200c "" \u200d "" \uFFEF ""] $text]
# -- --- --- --- --- --- ---
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
#c0 controls
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
#c1 controls - first section of the Latin-1 Supplement block - all non-printable from a utf-8 perspective
#some or all of these may have a visual representation in other encodings e.g cp855 seems to have 1 width for them all
#we are presuming that the text supplied has been converted from any other encoding to utf8 - so we don't need to handle that here
#they should also be unlikely to be present in an ansi-free string (as is a precondition for use of this function)
set text [regsub -all {[\u0080-\u009f]+} $text ""]
#short-circuit basic cases
#support tcl pre 2023-11 - see regexp bug below
#if {![regexp {[\uFF-\U10FFFF]} $text]} {
# return [string length $text]
#}
if {![regexp "\[\uFF-\U10FFFF\]" $text]} {
return [string length $text]
}
#split just to get the standalone character widths - and then scan for other combiners (?) - or scan for clusters first?
#review
#set can_regex_high_unicode [string match [regexp -all -inline {[\U80-\U0010FFFF]} \U0001F525] \U0001F525]
#tcl pre 2023-11 - braced high unicode regexes don't work
#fixed in bug-4ed788c618 2023-11
#set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
#maintain unicode as sequences - todo - scan for grapheme clusters
set uc_sequences [punk::ansi::ta::_perlish_split "(?:\[\u0000-\u00FF\])+" $text]
set len 0
foreach {uc ascii} $uc_sequences {
#puts "-ascii $ascii"
#puts "-uc $uc"
incr len [string length $ascii]
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#todo - find something that understands grapheme clusters - needed also for grapheme_split
#use punk::char::wcswidth - faster than the string split in textutil::wcswidth but still uses textutil::wcswidth_char
incr len [wcswidth $uc]
}
#todo - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
return $len
}
#kept as a fallback for review/test if textutil::wcswidth doesn't do what we require on all terminals.
#this version looks at console testwidths if they've been cached.
#It is relatively fast - but tests unicode widths char by char - so won't be useful going forward for grapheme clusters.
proc ansifreestring_width2 {text} {
#caller responsible for calling ansistrip first if text may have ansi codes - and for ensuring no newlines
@ -1831,7 +2030,7 @@ namespace eval punk::char {
#
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
set re_diacritics {[\u0300-\u036f]+|[\u1ab0-\u1aff]+|[\u1dc0-\u1dff]+|[\u20d0-\u20ff]+|[\ufe20-\ufe2f]+}
set text [regsub -all $re_diacritics $text ""]
#review
#if we strip out ZWJ \u0200d (zero width combiner) - then we don't give the chance for a grapheme combining test to merge properly e.g combining emojis
#as at 2024 - textutil::wcswidth doesn't seem to be aware of these - and counts the joiner as one wide
#ZWNJ \u0200c should also be zero length - but as a 'non' joiner - it should add zero to the length
#only map control sequences to nothing after processing ones with special effects, such as \b (\x07f)
#Note DEL \x1f will only
#ZWSP \u0200b zero width space
#we should only map control sequences to nothing after processing ones with length effects, such as \b (\x07f) or DEL \x1f
#todo - document that these shouldn't be present in input rather than explicitly checking here
set re_ascii_c0 {[\U0000-\U001F]}
set text [regsub -all $re_ascii_c0 $text ""]
@ -1856,7 +2062,7 @@ namespace eval punk::char {
return [string length $text]
}
#todo - check double-width chars in unicode blocks.. try to do reasonably quicky
set uc_chars [regexp -all -inline "\[\u0100-\U10FFFF\]" $text] ;#e.g return list of chars in range only
foreach c $uc_chars {
if {[regexp $re_ascii_fullwidth $c]} {
incr doublewidth_char_count
} else {
#review
# a)- terminals lie - so we could have a bad cached testwidth
# b)- textutil::wcswidth_char seems to be east-asian-width based - and not a reliable indicator of 'character cells taken by the character when printed to the terminal' despite this statement in tclllib docs.
#(character width is a complex context-dependent topic)
# c) by checking for a cached console testwidth first - we make this function less deterministic/repeatable depending on whether console tests have been run.
# d) Upstream caching of grapheme_width may also lock in a result from whatever method was first employed here
#Despite all this - the mechanism is hoped to give best effort consistency for terminals
#further work needed for combining emojis etc - which can't be done in a per character loop
#todo - see if wcswidth does any of this. It is very slow for strings that include mixed ascii/unicode - so perhaps we can use a perlish_split
# to process sequences of unicode.
#- and the user has the option to test character sets first if terminal position reporting gives better results
if {[char_info_is_testwidth_cached $c]} {
set width [char_info_testwidth_cached $c]
if {$width == 0} {
incr zerowidth_char_count
} elseif {$width == 2} {
incr doublewidth_char_count
}
} else {
#textutil::wcswidth uses unicode data
#fall back to textutil::wcswidth (which doesn't for example handle diactricts/combiners so we can't use until these and other things such as \u200b and diacritics are already stripped/accounted for)
#review - work out what to do with anomalies like grave combiner which print at different lengths on different terminals (fonts?) and for which cursor-query escape sequence lies.
# - various combining diacritical marks.. from grave - to various complicated unicode joiners and composing chars etc
#as at 2023 - terminals generally seem to use the simplistic approach of tallying up individual character-widths, which means combinations that print short are reported too long by the terminal esc 6 n sequence.
# initial simplistic approach is just to strip these ... todo REVIEW
#experiment to detect leading diacritics - but this isn't necessary - it's still zero-width - and if the user is splitting properly we shouldn't be getting a string with leading diacritics anyway
#(leading combiners may display in terminal as mark on rightmost prompt char which is usually a space - but won't add width even then)
#will accept a single char or a string - test using console cursor position reporting
#unreliable
proc char_info_testwidth {ch {emit 0}} {
package require punk::console
#uses cursor movement and relies on console to report position.. which it may do properly for single chars - but may misreport for combinations that combine into a single glyph
set punk::console::tabwidth $w ;#we also attempt to read terminal's tabstops and set tabwidth to the apparent spacing of first non-1 value in tabstops list.
catch {textutil::tabify::untabify2 "" $w} ;#textutil tabify can end up uninitialised and raise errors like "can't read Spaces(<n>).." after a tabstop change This call seems to keep tabify happy - review.
#some terminals (conemu on windows) scroll the viewport when we make a big move down like this - a move to 1 1 immediately after cursor_save doesn't seem to fix that.
#This issue also occurs when switching back from the alternate screen buffer - so perhaps that needs to be addressed elsewhere.
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions
lassign [split $index -] A B
if {$A eq "end"} {
set index [expr {$max - $B}]
} else {
set index [expr {$A - $B}]
switch -glob -- $index {
end {
set index $max
}
} elseif {[string match "*+*" $index]} {
lassign [split $index +] A B
if {$A eq "end"} {
#review - this will just result in out of bounds error in final test - as desired
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all.
set index [expr {$max + $B}]
} else {
set index [expr {$A + $B}]
"*-*" {
#end-int or int-int - like lrange etc we don't accept arbitrarily complex expressions
lassign [split $index -] A B
if {$A eq "end"} {
set index [expr {$max - $B}]
} else {
set index [expr {$A - $B}]
}
}
} else {
#May be something like +2 or -0 which braced expr can hanle
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources.
if {[catch {expr {$index}} index]} {
#could be end+x - but we don't want out of bounds to be valid
#set it to something that the final bounds expr test can deal with
set index Inf
"*+*" {
lassign [split $index +] A B
if {$A eq "end"} {
#review - this will just result in out of bounds error in final test - as desired
#By calculating here - we will see the result in the error message - but it's probably not particularly useful - as we don't really need end+ support at all.
set index [expr {$max + $B}]
} else {
set index [expr {$A + $B}]
}
}
default {
#May be something like +2 or -0 which braced expr can hanle
#we would like to avoid unbraced expr here - as we're potentially dealing with ranges that may come from external sources.
if {[catch {expr {$index}} index]} {
#could be end+x - but we don't want out of bounds to be valid
#set it to something that the final bounds expr test can deal with
error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower"
switch -- [string tolower $opt_case] {
upper {
set spec X
}
lower {
set spec x
}
default {
error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower"
}
}
set fmt "%${opt_width}.${opt_width}ll${spec}"
@ -529,6 +533,81 @@ namespace eval punk::lib {
return $answer
}
#like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter.
proc indent {text {prefix " "}} {
set result [list]
foreach line [split $text \n] {
if {[string trim $line] eq ""} {
lappend result ""
} else {
lappend result $prefix[string trimright $line]
}
}
return [join $result \n]
}
proc undent {text} {
if {$text eq ""} {
return ""
}
set lines [split $text \n]
set nonblank [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
continue
}
lappend nonblank $ln
}
set lcp [longestCommonPrefix $nonblank]
if {$lcp eq ""} {
return $text
}
regexp {^([\t ]*)} $lcp _m lcp
if {$lcp eq ""} {
return $text
}
set len [string length $lcp]
set result [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
lappend result ""
} else {
lappend result [string range $ln $len end]
}
}
return [join $result \n]
}
#A version of textutil::string::longestCommonPrefixList
proc longestCommonPrefix {items} {
if {[llength $items] <= 1} {
return [lindex $items 0]
}
set items [lsort $items[unset items]]
set min [lindex $items 0]
set max [lindex $items end]
#if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list)
#(sort order nothing to do with length - e.g min may be longer than max)
if {[string length $min] > [string length $max]} {
set temp $min
set min $max
set max $temp
}
set n [string length $min]
set prefix ""
set i -1
while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} {
append prefix $c
}
return $prefix
}
#test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty]
error "linelist: unknown -block option value: $bo known values: $known_blockopts"
}
}
}
}
#normalize certain combos
if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {[set posn [lsearch $opt_block trimtail1]] >=0 && "trimtail" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {"trimall" in $opt_block} {
#no other block options make sense in combination with this
set opt_block [list "trimall"]
#normalize certain combos
if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {[set posn [lsearch $opt_block trimtail1]] >=0 && "trimtail" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {"trimall" in $opt_block} {
#no other block options make sense in combination with this
set opt_block [list "trimall"]
}
#TODO
if {"triminner" in $opt_block } {
error "linelist -block triminner not implemented - sorry"
}
}
#TODO
if {"triminner" in $opt_block } {
error "linelist -block triminner not implemented - sorry"
}
# -- --- --- --- --- ---
set opt_line [dict get $opts -line]
set known_lineopts [list trimline trimleft trimright]
foreach lo $opt_line {
if {$lo ni $known_lineopts} {
error "linelist: unknown -line option value: $lo known values: $known_lineopts"
}
switch -- $lo {
trimline - trimleft - trimright {}
default {
set known_lineopts [list trimline trimleft trimright]
error "linelist: unknown -line option value: $lo known values: $known_lineopts"
}
}
}
#normalize trimleft trimright combo
if {"trimleft" in $opt_line && "trimright" in $opt_line} {
@ -777,9 +868,15 @@ namespace eval punk::lib {
set linelist $transformed
} else {
#INLINE punk::ansi::codetype::is_sgr_reset
#regexp {\x1b\[0*m$} $code
set re_is_sgr_reset {\x1b\[0*m$}
#INLINE punk::ansi::codetype::is_sgr
#regexp {\033\[[0-9;:]*m$} $code
set re_is_sgr {\x1b\[[0-9;:]*m$}
foreach ln $linelist {
set is_replay_pure_reset [punk::ansi::codetype::is_sgr_reset $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable
set ansisplits [punk::ansi::ta::split_codes_single $ln]
if {[llength $ansisplits]<= 1} {
@ -819,6 +916,11 @@ namespace eval punk::lib {
set codestack [list $code]
} else {
if {[punk::ansi::codetype::is_sgr $code]} {
#todo - proper test of each code - so we only take latest background/foreground etc.
#requires handling codes with varying numbers of parameters.
#basic simplification - remove straight dupes.
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars.
set codestack [lremove $codestack {*}$dup_posns]
lappend codestack $code
} ;#else gx0 or other code - we don't want to stack it with SGR codes
}
@ -834,7 +936,9 @@ namespace eval punk::lib {
}
}
set newreplay [join $codestack ""]
#set newreplay [join $codestack ""]
set newreplay [punk::ansi::codetype::sgr_merge {*}$codestack]
if {$line_has_sgr && $newreplay ne $replaycodes} {
#adjust if it doesn't already does a reset at start
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} {
#NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least)
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a content-only cksum of a folder structure. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto"
puts stderr "cksum_path doesn't yet support a cksum of a folder structure without tar. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
puts stderr "cksum_path doesn't yet support a tar checksum of a folder structure without metadat. Currently only files supported without metadata. For folders use cksum_path -cksum_meta 1 or auto with -cksum_usetar 1 or auto"
#stdin with line-mode readable events (at least on windows for Tcl 8.7a6 to 9.0a) can get stuck with bytes pending when input longer than 100chars - even though there is a linefeed further on than that.
#This potentially affects a reasonable number of Tcl8.7 kit/tclsh binaries out in the wild.
#see bug https://core.tcl-lang.org/tcl/tktview/bda99f2393 (gets stdin problem when non-blocking - Windows)
#we haven't put the data following last le into commandstr - but we want to display proper completion status prior to enter being hit or more data coming in.
#this could give spurious results for large pastes where buffering chunks it in odd places.?
#it does however give sensible output for the common case of a small paste where the last line ending wasn't included
set waiting [punk::lib::system::incomplete $commandstr$readingchunk]
set new_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m]
lappend files_copied $current_source_dir/$m
} else {
set is_skip 1
lappend files_skipped $current_source_dir/$m
}
} elseif {$overwrite_what eq "synced-targets"} {
if {[llength $changed]} {
#only overwrite if the target checksum equals the last installed checksum (ie target is in sync with source and so hasn't been customized)
set existing_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m]
set is_target_unmodified_since_install 0
set target_cksum_compare "unknown"
set latest_install_record [punkcheck::recordlist::file_record_latest_installrecord $filerec] ;#may be no such record - in which case we get an empty list
if {[dict exists $latest_install_record -targets_cksums]} {
set last_install_cksum [dict get $latest_install_record -targets_cksums] ;#in this case we know there is only one as 'install' always uses targetset size of 1. (FILEINFO record per file in source folder)
if {[dict get $existing_tgt_cksum_info cksum] eq $last_install_cksum} {
set is_target_unmodified_since_install 1
set target_cksum_compare "match"
} else {
set target_cksum_compare "nomatch"
}
} else {
set target_cksum_compare "norecord"
}
if {$is_target_unmodified_since_install} {
switch -- $overwrite_what {
installedsourcechanged-targets {
if {[llength $changed]} {
#An unrecorded installation is considered a source change (from unknown/unrecorded source to recorded)
set new_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m]
lappend files_copied $current_source_dir/$m
} else {
#either cksum is different or we were unable to verify the record. Either way we can't know if the target is in sync so we must skip it
set is_skip 1
puts stderr "Skipping file copy $m target $current_target_dir/$m - require synced_target to overwrite - current target cksum compared to previous install: $target_cksum_compare"
lappend files_skipped $current_source_dir/$m
}
} else {
}
synced-targets {
if {[llength $changed]} {
#only overwrite if the target checksum equals the last installed checksum (ie target is in sync with source and so hasn't been customized)
set existing_tgt_cksum_info [punk::mix::base::lib::cksum_path $current_target_dir/$m]
set is_target_unmodified_since_install 0
set target_cksum_compare "unknown"
set latest_install_record [punkcheck::recordlist::file_record_latest_installrecord $filerec] ;#may be no such record - in which case we get an empty list
if {[dict exists $latest_install_record -targets_cksums]} {
set last_install_cksum [dict get $latest_install_record -targets_cksums] ;#in this case we know there is only one as 'install' always uses targetset size of 1. (FILEINFO record per file in source folder)
if {[dict get $existing_tgt_cksum_info cksum] eq $last_install_cksum} {
if {[dict get $install_record tag] ni [list "QUERY-INPROGRESS" "INSTALL-RECORD" "INSTALL-SKIPPED" "INSTALL-INPROGRESS" "MODIFY-INPROGRESS" "MODIFY-RECORD" "MODIFY-SKIPPED" "VIRTUAL-INPROGRESS" "VIRTUAL-RECORD" "VIRTUAL-SKIPPED" "DELETE-RECORD" "DELETE-INPROGRESS" "DELETE-SKIPPED"]} {
error "file_install_record_source_changes bad install record: tag '[dict get $install_record tag]' not INSTALL-RECORD|SKIPPED|INSTALL-INPROGRESS|MODIFY-RECORD|MODIFY-INPROGRESS|VIRTUAL-RECORD|VIRTUAL-INPROGRESS|DELETE-RECORD|DELETE-INPROGRESS"
switch -- [dict get $install_record tag] {
"QUERY-INPROGRESS" -
"INSTALL-RECORD" -
"INSTALL-SKIPPED" -
"INSTALL-INPROGRESS" -
"MODIFY-INPROGRESS" -
"MODIFY-RECORD" -
"MODIFY-SKIPPED" -
"VIRTUAL-INPROGRESS" -
"VIRTUAL-RECORD" -
"VIRTUAL-SKIPPED" -
"DELETE-RECORD" -
"DELETE-INPROGRESS" -
"DELETE-SKIPPED" {
}
default {
error "file_install_record_source_changes bad install record: tag '[dict get $install_record tag]' not INSTALL-RECORD|SKIPPED|INSTALL-INPROGRESS|MODIFY-RECORD|MODIFY-INPROGRESS|VIRTUAL-RECORD|VIRTUAL-INPROGRESS|DELETE-RECORD|DELETE-INPROGRESS"
}
}
set source_list [dict_getwithdefault $install_record body [list]]
#single "final byte" in the range 0x40–0x7E (ASCII @A–Z[\]^_`a–z{|}~).
dict set escape_terminals CSI [list @ \\ ^ _ ` | ~ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z "\{" "\}"]
# but here it may help detect unexpected changes during lifetime of the stack and avoids the chance of callers incorrectly using the transform handle?)
#review - reconsider the handling of redirections such that tcl-style are handled totally separately to other shell syntaxes!
#
#note 2>@1 must ocur as last word for tcl - but 2@stdout can occur elsewhere
#(2>@stdout echoes to main stdout - not into pipeline)
#To properly do pipelines it looks like we will have to split on | and call this proc multiple times and wire it up accordingly (presumably in separate threads)
set custom_stderr ""
if {[string trim $lastitem] in [list {2>&1} {2>@1}]} {
set custom_stderr {2>@1} ;#use the tcl style
set commandlist [lrange $commandlist 0 end-1]
} else {
# 2> filename
# 2>> filename
# 2>@ openfileid
set redir2test [string range $lastitem 0 1]
if {$redir2test eq "2>"} {
set custom_stderr $lastitem
switch -- [string trim $lastitem] {
{&} {
set name [lindex $commandlist 0]
#background execution - stdout and stderr from child still comes here - but process is backgrounded
#FIX! - this is broken for paths with backslashes for example
#we leave stdout without imposed ansi colouring - because the source may be colourised
#stderr might have source colouring - but it usually doesn't seem to, and the visual distiction of red stderr is very handy for the run command.
#we leave stdout without imposed ansi colouring - because the source may be colourised and because ansi-wrapping a stream at whatever boundaries it comes in at isn't a really nice thing to do.
#stderr might have source colouring - but it usually doesn't seem to, and the visual distiction of red stderr can be very handy for the run command.
#A further enhancement could be to detect well-known options such as --color and/or use a configuration for specific commands that have useful colourised stderr,
#but defaulting stderr to red is a compromise.
#but having an option to configure stderr to red is a compromise.
#Note that the other run commands, runout,runerr, runx don't emit in real-time - so for those commands there may be options to detect and/or post-process stdout and stderr.
#TODO - fix. This has no effect because the repl adds an ansiwrap transform
#TODO - fix. This has no effect if/when the repl adds an ansiwrap transform
# what we probably want to do is 'aside' that transform for runxxx commands only.
#return a homogenous block of characters - ie lines all same length, all same character
#printing-width in terminal columns is not necessarily same as blockwidth even if a single char is passed (could be full-width unicode character)
#This can have ansi SGR codes - but to repeat blocks containing ansi-movements, the block should first be rendered to a static output with something like overtype::left
proc block {blockwidth blockheight {char " "}} {
if {$blockwidth < 0} {
error "textblock::block blockwidth must be an integer greater than or equal to zero"
@ -50,15 +51,17 @@ namespace eval textblock {
error "textblock::block blockheight must be a positive integer"
}
if {$char eq ""} {return ""}
#using string length is ok
if {[string length $char] == 1} {
set row [string repeat $char $blockwidth]
set mtrx [lrepeat $blockheight $row]
return [::join $mtrx \n]
} else {
set charblock [string map [list \r\n \n] $char]
if {[string first \n $charblock] >= 0} {
if {[string last \n $charblock] >= 0} {
if {$blockwidth > 1} {
set row [.= val $charblock {*}[lrepeat [expr {$blockwidth -1}] |> piper_blockjoin $charblock]]
#set row [.= val $charblock {*}[lrepeat [expr {$blockwidth -1}] |> piper_blockjoin $charblock]] ;#building a repeated "|> command arg" list to evaluate as a pipeline. (from before textblock::join could take arbitrary num of blocks )
set row [textblock::join {*}[lrepeat $blockwidth $charblock]]
} else {
set row $charblock
}
@ -72,34 +75,34 @@ namespace eval textblock {
#todo - consider 'elastic tabstops' for textblocks where tab acts as a column separator and adjacent lines with the same number of tabs form a sort of table
proc width {textblock} {
#backspaces, vertical tabs,carriage returns
#backspaces, vertical tabs ?
if {$textblock eq ""} {
return 0
}
#textutil::tabify is a reasonable hack when there are no ansi SGR codes - but probably not always what we want even then - review
set textblock [textutil::tabify::untabify2 $textblock]
if {[string first \n $textblock] >= 0} {
return [tcl::mathfunc::max {*}[lmap v [punk::lib::lines_as_list -- $textblock] {::punk::char::ansifreestring_width [stripansi $v]}]]
if {[string last \t $textblock] >= 0} {
if {[info exists punk::console::tabwidth]} {
set tw $::punk::console::tabwidth
} else {
set tw 8
}
set textblock [textutil::tabify::untabify2 $textblock $tw]
if {$opt_align ni [list left right centre center]} {
#these are all valid commands for overtype::<cmd>
error "frame option -align must be left|right|centre|center - received: $$opt_align"
switch -- $opt_align {
left - right - centre - center {}
default {
error "frame option -align must be left|right|centre|center - received: $$opt_align"
}
}
#these are all valid commands for overtype::<cmd>
# -- --- --- --- --- ---
set opt_ansiborder [dict get $opts -ansiborder]
# -- --- --- --- --- ---
set contents [textutil::tabify::untabify2 $contents]
if {[string last \t $contents] >= 0} {
if {[info exists punk::console::tabwidth]} {
set tw $::punk::console::tabwidth
} else {
set tw 8
}
if {$opt_etabs} {
set contents [textutil::tabify::untabify2 $contents $tw]
}
}
set contents [string map [list \r\n \n] $contents]
set actual_contentwidth [width $contents]
@ -394,156 +451,164 @@ namespace eval textblock {
set linecount [textblock::height $contents]
set rst [a]
set column [string repeat " " $contentwidth] ;#default - may need to override for custom frame
if {$opt_type eq "altg"} {
#old style ansi escape sequences with alternate graphics page G0
set hl [cd::hl]
set hlt $hl
set hlb $hl
set vl [cd::vl]
set vll $vl
set vlr $vl
set tlc [cd::tlc]
set trc [cd::trc]
set blc [cd::blc]
set brc [cd::brc]
set tbar [string repeat $hl $contentwidth]
set tbar [cd::groptim $tbar]
set bbar $tbar
} elseif {$opt_type eq "ascii"} {
set hl -
set hlt -
set hlb -
set vl |
set vll |
set vlr |
set tlc +
set trc +
set blc +
set brc +
set tbar [string repeat - $contentwidth]
set bbar $tbar
} elseif {$opt_type eq "unicode_box"} {
#unicode box drawing set
set hl [punk::char::charshort boxd_lhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_lv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ldr]
set trc [punk::char::charshort boxd_ldl]
set blc [punk::char::charshort boxd_lur]
set brc [punk::char::charshort boxd_lul]
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
} elseif {$opt_type eq "unicode_box_heavy"} {
#unicode box drawing set
set hl [punk::char::charshort boxd_hhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_hv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_hdr]
set trc [punk::char::charshort boxd_hdl]
set blc [punk::char::charshort boxd_hur]
set brc [punk::char::charshort boxd_hul]
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
} elseif {$opt_type eq "unicode_double"} {
#unicode box drawing set
set hl [punk::char::charshort boxd_dhz] ;# double horizontal \U2550
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_dv] ;#double vertical \U2551
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ddr] ;#double down and right \U2554
set trc [punk::char::charshort boxd_ddl] ;#double down and left \U2557
set blc [punk::char::charshort boxd_dur] ;#double up and right \U255A
set brc [punk::char::charshort boxd_dul] ;#double up and left \U255D
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
} elseif {$opt_type eq "unicode_arc"} {
#unicode box drawing set
set hl [punk::char::charshort boxd_lhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_lv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ladr] ;#light arc down and right \U256D
set trc [punk::char::charshort boxd_ladl] ;#light arc down and left \U256E
set blc [punk::char::charshort boxd_laur] ;#light arc up and right \U2570
set brc [punk::char::charshort boxd_laul] ;#light arc up and left \U256F
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
} else {
dict with custom_frame {} ;#extract keys as vars
if {[dict exists $custom_frame hlt]} {
set hlt [dict get $custom_frame hlt]
} else {
switch -- $opt_type {
"altg" {
#old style ansi escape sequences with alternate graphics page G0
set hl [cd::hl]
set hlt $hl
set hlb $hl
set vl [cd::vl]
set vll $vl
set vlr $vl
set tlc [cd::tlc]
set trc [cd::trc]
set blc [cd::blc]
set brc [cd::brc]
set tbar [string repeat $hl $contentwidth]
set tbar [cd::groptim $tbar]
set bbar $tbar
}
"ascii" {
set hl -
set hlt -
set hlb -
set vl |
set vll |
set vlr |
set tlc +
set trc +
set blc +
set brc +
set tbar [string repeat - $contentwidth]
set bbar $tbar
}
"unicode_box" {
#unicode box drawing set
set hl [punk::char::charshort boxd_lhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_lv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ldr]
set trc [punk::char::charshort boxd_ldl]
set blc [punk::char::charshort boxd_lur]
set brc [punk::char::charshort boxd_lul]
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
}
set hlt_width [punk::ansi::printing_length $hlt]
if {[dict exists $custom_frame hlb]} {
set hlb [dict get $custom_frame hlb]
} else {
"unicode_box_heavy" {
#unicode box drawing set
set hl [punk::char::charshort boxd_hhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_hv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_hdr]
set trc [punk::char::charshort boxd_hdl]
set blc [punk::char::charshort boxd_hur]
set brc [punk::char::charshort boxd_hul]
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
}
set hlb_width [punk::ansi::printing_length $hlb]
if {[dict exists $custom_frame vll]} {
set vll [dict get $custom_frame vll]
} else {
"unicode_double" {
#unicode box drawing set
set hl [punk::char::charshort boxd_dhz] ;# double horizontal \U2550
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_dv] ;#double vertical \U2551
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ddr] ;#double down and right \U2554
set trc [punk::char::charshort boxd_ddl] ;#double down and left \U2557
set blc [punk::char::charshort boxd_dur] ;#double up and right \U255A
set brc [punk::char::charshort boxd_dul] ;#double up and left \U255D
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
}
set vll_width [punk::ansi::printing_length $vll]
if {[dict exists $custom_frame vlr]} {
set vlr [dict get $custom_frame vlr]
} else {
"unicode_arc" {
#unicode box drawing set
set hl [punk::char::charshort boxd_lhz] ;# light horizontal
set hlt $hl
set hlb $hl
set vl [punk::char::charshort boxd_lv] ;#light vertical
set vll $vl
set vlr $vl
set tlc [punk::char::charshort boxd_ladr] ;#light arc down and right \U256D
set trc [punk::char::charshort boxd_ladl] ;#light arc down and left \U256E
set blc [punk::char::charshort boxd_laur] ;#light arc up and right \U2570
set brc [punk::char::charshort boxd_laul] ;#light arc up and left \U256F
set tbar [string repeat $hl $contentwidth]
set bbar $tbar
}
set vlr_width [punk::ansi::printing_length $vlr]
default {
dict with custom_frame {} ;#extract keys as vars
if {[dict exists $custom_frame hlt]} {
set hlt [dict get $custom_frame hlt]
} else {
set hlt $hl
}
set hlt_width [punk::ansi::printing_length $hlt]
if {[dict exists $custom_frame hlb]} {
set hlb [dict get $custom_frame hlb]
} else {
set hlb $hl
}
set hlb_width [punk::ansi::printing_length $hlb]
set tlc_width [punk::ansi::printing_length $tlc]
set trc_width [punk::ansi::printing_length $trc]
set blc_width [punk::ansi::printing_length $blc]
set brc_width [punk::ansi::printing_length $brc]
if {[dict exists $custom_frame vll]} {
set vll [dict get $custom_frame vll]
} else {
set vll $vl
}
set vll_width [punk::ansi::printing_length $vll]
if {[dict exists $custom_frame vlr]} {
set vlr [dict get $custom_frame vlr]
} else {
set vlr $vl
}
set vlr_width [punk::ansi::printing_length $vlr]
set tlc_width [punk::ansi::printing_length $tlc]
set trc_width [punk::ansi::printing_length $trc]
set blc_width [punk::ansi::printing_length $blc]
set brc_width [punk::ansi::printing_length $brc]
set framewidth [expr {$contentwidth + 2}] ;#reverse default assumption
if {$opt_width eq ""} {
#width wasn't specified - so user is expecting frame to adapt to title/contents
#content shouldn't truncate because of extra wide frame
set contentwidth $content_or_title_width
set tbarwidth [expr {$content_or_title_width + 2 - $tlc_width - $trc_width - 2 + $vll_width + $vlr_width}] ;#+/2's for difference between border element widths and standard element single-width
set contentwidth [expr $opt_width - $vll_width - $vlr_width] ;#content may be truncated
set tbarwidth [expr {$opt_width - $tlc_width - $trc_width}]
set bbarwidth [expr {$opt_width - $blc_width - $brc_width}]
}
set column [string repeat " " $contentwidth]
if {$hlt_width == 1} {
set tbar [string repeat $hlt $tbarwidth]
} else {
#possibly mixed width chars that make up hlt - string range won't get width right
set blank [string repeat " " $tbarwidth]
set count [expr {($tbarwidth / $hlt_width) + 1}]
set tbar [string repeat $hlt $count]
#set tbar [string range $tbar 0 $tbarwidth-1]
set tbar [overtype::left -overflow 0 -exposed1 " " -exposed2 " " $blank $tbar];#spaces for exposed halves of 2w chars instead of default replacement character
set framewidth [expr {$contentwidth + 2}] ;#reverse default assumption
if {$opt_width eq ""} {
#width wasn't specified - so user is expecting frame to adapt to title/contents
#content shouldn't truncate because of extra wide frame
set contentwidth $content_or_title_width
set tbarwidth [expr {$content_or_title_width + 2 - $tlc_width - $trc_width - 2 + $vll_width + $vlr_width}] ;#+/2's for difference between border element widths and standard element single-width
set contentwidth [expr $opt_width - $vll_width - $vlr_width] ;#content may be truncated
set tbarwidth [expr {$opt_width - $tlc_width - $trc_width}]
set bbarwidth [expr {$opt_width - $blc_width - $brc_width}]
}
set column [string repeat " " $contentwidth]
if {$hlt_width == 1} {
set tbar [string repeat $hlt $tbarwidth]
} else {
#possibly mixed width chars that make up hlt - string range won't get width right
set blank [string repeat " " $tbarwidth]
set count [expr {($tbarwidth / $hlt_width) + 1}]
set tbar [string repeat $hlt $count]
#set tbar [string range $tbar 0 $tbarwidth-1]
set tbar [overtype::left -overflow 0 -exposed1 " " -exposed2 " " $blank $tbar];#spaces for exposed halves of 2w chars instead of default replacement character