You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

2034 lines
83 KiB

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from <pkg>-buildversion.txt
# module template: punkshell/src/decktemplates/vendor/punk/modules/template_module-0.0.2.tm
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2024
#
# @@ Meta Begin
# Application punk::lib 999999.0a1.0
# Meta platform tcl
# Meta license BSD
# @@ Meta End
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::lib 0 999999.0a1.0]
#[copyright "2024"]
#[titledesc {punk general utility functions}] [comment {-- Name section and table of contents description --}]
#[moddesc {punk library}] [comment {-- Description at end of page heading --}]
#[require punk::lib]
#[keywords module utility lib]
#[description]
#[para]This is a set of utility functions that are commonly used across punk modules or are just considered to be general-purpose functions.
#[para]The base set includes string and math functions but has no specific theme
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Overview]
#[para] overview of punk::lib
#[subsection Concepts]
#[para]The punk::lib modules should have no strong dependencies other than Tcl
#[para]Dependendencies that only affect display or additional functionality may be included - but should fail gracefully if not present, and only when a function is called that uses one of these soft dependencies.
#[para]This requirement for no strong dependencies, means that many utility functions that might otherwise seem worthy of inclusion here are not present.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[subsection dependencies]
#[para] packages used by punk::lib
#[list_begin itemized]
package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6-}]
# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]
#*** !doctools
#[list_end]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section API]
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# oo::class namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::lib::class {
#*** !doctools
#[subsection {Namespace punk::lib::class}]
#[para] class definitions
if {[info commands [tcl::namespace::current]::interface_sample1] eq ""} {
#*** !doctools
#[list_begin enumerated]
# oo::class create interface_sample1 {
# #*** !doctools
# #[enum] CLASS [class interface_sample1]
# #[list_begin definitions]
# method test {arg1} {
# #*** !doctools
# #[call class::interface_sample1 [method test] [arg arg1]]
# #[para] test method
# puts "test: $arg1"
# }
# #*** !doctools
# #[list_end] [comment {-- end definitions interface_sample1}]
# }
#*** !doctools
#[list_end] [comment {--- end class enumeration ---}]
}
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
tcl::namespace::eval punk::lib::ensemble {
#wiki.tcl-lang.org/page/ensemble+extend
# extend an ensemble-like routine with the routines in some namespace
proc extend {routine extension} {
if {![string match ::* $routine]} {
set resolved [uplevel 1 [list ::tcl::namespace::which $routine]]
if {$resolved eq {}} {
error [list {no such routine} $routine]
}
set routine $resolved
}
set routinens [tcl::namespace::qualifiers $routine]
if {$routinens eq {::}} {
set routinens {}
}
set routinetail [tcl::namespace::tail $routine]
if {![string match ::* $extension]} {
set extension [uplevel 1 [
list [tcl::namespace::which namespace] current]]::$extension
}
if {![tcl::namespace::exists $extension]} {
error [list {no such namespace} $extension]
}
set extension [tcl::namespace::eval $extension [
list [tcl::namespace::which namespace] current]]
tcl::namespace::eval $extension [
list [tcl::namespace::which namespace] export *]
while 1 {
set renamed ${routinens}::${routinetail}_[info cmdcount]
if {[tcl::namespace::which $renamed] eq {}} break
}
rename $routine $renamed
tcl::namespace::eval $extension [
list namespace ensemble create -command $routine -unknown [
list apply {{renamed ensemble routine args} {
list $renamed $routine
}} $renamed
]
]
return $routine
}
}
tcl::namespace::eval punk::lib::compat {
#*** !doctools
#[subsection {Namespace punk::lib::compat}]
#[para] compatibility functions for features that may not be available in earlier Tcl versions
#[para] These are generally 'forward compatibility' functions ie allowing earlier versions to use later features/idioms by using a Tcl-only version of a missing builtin.
#[para] Such Tcl-only versions will inevitably be less performant - perhaps significantly so.
#*** !doctools
#[list_begin definitions]
if {"::lremove" ne [info commands ::lremove]} {
#puts stderr "Warning - no built-in lremove"
interp alias {} lremove {} ::punk::lib::compat::lremove
}
proc lremove {list args} {
#*** !doctools
#[call [fun lremove] [arg list] [opt {index ...}]]
#[para] Forwards compatible lremove for versions 8.6 or less to support equivalent 8.7 lremove
set data [lmap v $list {list data $v}]
foreach doomed_index $args {
if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"}
lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value
}
set keep [lsearch -all -inline -not -exact $data x]
return [lsearch -all -inline -index 1 -subindices $keep *]
}
#not significantly different in performance over test of 100 elements - getting somewhere near 10% for 1k integers
proc lremove2 {list args} {
set data [lmap v $list {list data $v}]
foreach doomed_index $args {
if {[llength $doomed_index] != 1} {error "bad index \"$doomed_index\": must be integer?\[+-]integer? or end?\[+-]integer?"}
lset data $doomed_index x ;#x won't collide as all our data has been mapped to 2 elements per value
}
set keep [lsearch -all -inline -not -exact $data x]
return [lmap v $keep {lindex $v 1}]
}
#outside of lmap - don't know of any particularly nice ways to flatten to subindex 1 of each element..
#flattening then lsearch with -stride and * would be nice - but it's not avail in 8.6 - and even in 8.7 it doesn't seem to allow returning one index of the stridden 'group'
if {"::lpop" ne [info commands ::lpop]} {
#puts stderr "Warning - no built-in lpop"
interp alias {} lpop {} ::punk::lib::compat::lpop
}
proc lpop {lvar args} {
#*** !doctools
#[call [fun lpop] [arg listvar] [opt {index}]]
#[para] Forwards compatible lpop for versions 8.6 or less to support equivalent 8.7 lpop
upvar $lvar l
if {![llength $args]} {
set args [list end]
}
set v [lindex $l {*}$args]
set newlist $l
set path [list]
set subl $l
for {set i 0} {$i < [llength $args]} {incr i} {
set idx [lindex $args $i]
if {![llength [lrange $subl $idx $idx]]} {
error "tcl_lpop index \"$idx\" out of range"
}
lappend path [lindex $args $i]
set subl [lindex $l {*}$path]
}
set sublist_path [lrange $args 0 end-1]
set tailidx [lindex $args end]
if {![llength $sublist_path]} {
#set newlist [lremove $newlist $tailidx]
set newlist [lreplace $newlist $tailidx $tailidx]
} else {
set sublist [lindex $newlist {*}$sublist_path]
#set sublist [lremove $sublist $tailidx]
set sublist [lreplace $sublist $tailidx $tailidx]
lset newlist {*}$sublist_path $sublist
}
#puts "[set l] -> $newlist"
set l $newlist
return $v
}
#slight isolation - varnames don't leak - but calling context vars can be affected
proc lmaptcl2 {varnames list script} {
set result [list]
set values [list]
foreach v $varnames {
lappend values "\$$v"
}
set linkvars [uplevel 1 [list info vars]]
set nscaller [uplevel 1 [list namespace current]]
set apply_script ""
foreach vname $linkvars {
append apply_script [string map [list %vname% $vname]\
{upvar 2 %vname% %vname%}\
] \n
}
append apply_script $script \n
#puts "--> $apply_script"
foreach $varnames $list {
lappend result [apply\
[list\
$varnames\
$apply_script\
$nscaller\
] {*}[subst $values]\
]
}
return $result
}
if {"::lmap" ne [info commands ::lmap]} {
#puts stderr "Warning - no built-in lpop"
interp alias {} lpop {} ::punk::lib::compat::lmaptcl
}
#lmap came in Tcl 8.6 - so probably not much need for a tcl forward compatibility version - but here it is anyway
proc lmaptcl {varnames list script} {
set result [list]
set varlist [list]
foreach varname $varnames {
upvar 1 $varname var_$varname ;#ensure no collisions with vars in this proc
lappend varlist var_$varname
}
foreach $varlist $list {
lappend result [uplevel 1 $script]
}
return $result
}
#tcl8.7/9 compatibility for 8.6
if {[info commands ::tcl::string::insert] eq ""} {
#https://wiki.tcl-lang.org/page/string+insert
# Pure Tcl implementation of [string insert] command.
proc ::tcl::string::insert {string index insertString} {
# Convert end-relative and TIP 176 indexes to simple integers.
if {[regexp -expanded {
^(end(?![\t\n\v\f\r ]) # "end" is never followed by whitespace
|[\t\n\v\f\r ]*[+-]?\d+) # m, with optional leading whitespace
(?:([+-]) # op, omitted when index is "end"
([+-]?\d+))? # n, omitted when index is "end"
[\t\n\v\f\r ]*$ # optional whitespace (unless "end")
} $index _ m op n]} {
# Convert first index to an integer.
switch $m {
end {set index [string length $string]}
default {scan $m %d index}
}
# Add or subtract second index, if provided.
switch $op {
+ {set index [expr {$index + $n}]}
- {set index [expr {$index - $n}]}
}
} elseif {![string is integer -strict $index]} {
# Reject invalid indexes.
return -code error "bad index \"$index\": must be\
integer?\[+-\]integer? or end?\[+-\]integer?"
}
# Concatenate the pre-insert, insertion, and post-insert strings.
string cat [string range $string 0 [expr {$index - 1}]] $insertString\
[string range $string $index end]
}
# Bind [string insert] to [::tcl::string::insert].
tcl::namespace::ensemble configure string -map [tcl::dict::replace\
[tcl::namespace::ensemble configure string -map]\
insert ::tcl::string::insert]
}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::lib::compat ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Base namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::lib {
tcl::namespace::export *
#variable xyz
#*** !doctools
#[subsection {Namespace punk::lib}]
#[para] Core API functions for punk::lib
#[list_begin definitions]
proc range {from to args} {
if {[info commands lseq] ne ""} {
#tcl 8.7+ lseq significantly faster for larger ranges
return [lseq $from $to]
}
set count [expr {($to -$from) + 1}]
incr from -1
return [lmap v [lrepeat $count 0] {incr from}]
}
proc is_list_all_in_list {small large} {
package require struct::list
package require struct::set
set small_in_large [lsort [struct::set intersect [lsort -unique $small] $large ]]
return [struct::list equal [lsort $small] $small_in_large]
}
proc is_list_all_ni_list {a b} {
package require struct::set
set i [struct::set intersect $a $b]
return [expr {[llength $i] == 0}]
}
#somewhat like struct::set difference - but order preserving, and doesn't treat as a 'set' so preserves dupes in fromlist
#struct::set difference may happen to preserve ordering when items are integers, but order can't be relied on,
# especially as struct::list has 2 differing implementations (tcl vs critcl) which return results with different ordering to each other.
proc ldiff {fromlist removeitems} {
set doomed [list]
foreach item $removeitems {
lappend doomed {*}[lsearch -all -exact $fromlist $item]
}
lremove $fromlist {*}$doomed
}
package require struct::set
if {[struct::set equal [struct::set union {a a} {}] {a}]} {
proc lunique_unordered {list} {
struct::set union $list {}
}
} else {
puts stderr "WARNING: struct::set union <list> <emptylist> no longer dedupes!"
proc lunique_unordered {list} {
tailcall lunique $list
}
}
#order-preserving
proc lunique {list} {
set doomed [list]
#expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?)
for {set i 0} {$i < [llength $list]} {} {
set item [lindex $list $i]
lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end]
while {[incr i] in $doomed} {}
}
lremove $list {*}$doomed
}
proc lunique1 {list} {
set doomed [list]
#expr 'in' probably faster than using a dict - for lists approx < 20,000 items. (wiki wisdom - url?)
set i 0
foreach item $list {
if {$i in $doomed} {
incr i
continue
}
lappend doomed {*}[lrange [lsearch -all -exact -start $i $list $item] 1 end]
incr i
}
puts --->doomed:$doomed
lremove $list {*}$doomed
}
proc lunique2 {list} {
set new {}
foreach item $list {
if {$item ni $new} {
lappend new $item
}
}
return $new
}
#The closure-like behaviour is *very* slow especially when called from a context such as the global namespace with lots of vars and large arrays such as ::env
proc lmapflat_closure {varnames list script} {
set result [list]
set values [list]
foreach v $varnames {
lappend values "\$$v"
}
# -- --- ---
#capture - use uplevel 1 or namespace eval depending on context
set capture [uplevel 1 {
apply { varnames {
set capturevars [tcl::dict::create]
set capturearrs [tcl::dict::create]
foreach fullv $varnames {
set v [tcl::namespace::tail $fullv]
upvar 1 $v var
if {[info exists var]} {
if {(![array exists var])} {
tcl::dict::set capturevars $v $var
} else {
tcl::dict::set capturearrs capturedarray_$v [array get var]
}
} else {
#A variable can show in the results for 'info vars' but still not 'exist'. e.g a 'variable x' declaration in the namespace where the variable has never been set
}
}
return [tcl::dict::create vars $capturevars arrs $capturearrs]
} } [info vars]
} ]
# -- --- ---
set cvars [tcl::dict::get $capture vars]
set carrs [tcl::dict::get $capture arrs]
set apply_script ""
foreach arrayalias [tcl::dict::keys $carrs] {
set realname [string range $arrayalias [string first _ $arrayalias]+1 end]
append apply_script [string map [list %realname% $realname %arrayalias% $arrayalias] {
array set %realname% [set %arrayalias%][unset %arrayalias%]
}]
}
append apply_script [string map [list %script% $script] {
#foreach arrayalias [info vars capturedarray_*] {
# set realname [string range $arrayalias [string first _ $arrayalias]+1 end]
# array set $realname [set $arrayalias][unset arrayalias]
#}
#return [eval %script%]
%script%
}]
#puts "--> $apply_script"
foreach $varnames $list {
lappend result {*}[apply\
[list\
[concat $varnames [tcl::dict::keys $cvars] [tcl::dict::keys $carrs] ]\
$apply_script\
] {*}[subst $values] {*}[tcl::dict::values $cvars] {*}[tcl::dict::values $carrs] ]
}
return $result
}
#link version - can write to vars in calling context - but keeps varnames themselves isolated
#performance much better than capture version - but still a big price to pay for the isolation
proc lmapflat_link {varnames list script} {
set result [list]
set values [list]
foreach v $varnames {
lappend values "\$$v"
}
set linkvars [uplevel 1 [list info vars]]
set nscaller [uplevel 1 [list namespace current]]
set apply_script ""
foreach vname $linkvars {
append apply_script [string map [list %vname% $vname]\
{upvar 2 %vname% %vname%}\
] \n
}
append apply_script $script \n
#puts "--> $apply_script"
foreach $varnames $list {
lappend result {*}[apply\
[list\
$varnames\
$apply_script\
$nscaller\
] {*}[subst $values]\
]
}
return $result
}
#proc lmapflat {varnames list script} {
# concat {*}[uplevel 1 [list lmap $varnames $list $script]]
#}
#lmap can accept multiple var list pairs
proc lmapflat {args} {
concat {*}[uplevel 1 [list lmap {*}$args]]
}
proc lmapflat2 {args} {
concat {*}[uplevel 1 lmap {*}$args]
}
proc dict_getdef {dictValue args} {
if {[llength $args] < 1} {
error {wrong # args: should be "dict_getdef dictValue ?key ...? key default"}
}
set keys [lrange $args -1 end-1]
if {[tcl::dict::exists $dictValue {*}$keys]} {
return [tcl::dict::get $dictValue {*}$keys]
} else {
return [lindex $args end]
}
}
#proc sample1 {p1 n args} {
# #*** !doctools
# #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]]
# #[para]Description of sample1
# #[para] Arguments:
# # [list_begin arguments]
# # [arg_def tring p1] A description of string argument p1.
# # [arg_def integer n] A description of integer argument n.
# # [list_end]
# return "ok"
#}
proc lindex_resolve {list index} {
#*** !doctools
#[call [fun lindex_resolve] [arg list] [arg index]]
#[para]Resolve an index which may be of the forms accepted by Tcl list commands such as end-2 or 2+2 to the actual integer index for the supplied list
#[para]Users may define procs which accept a list index and wish to accept the forms understood by Tcl.
#[para]This means the proc may be called with something like $x+2 end-$y etc
#[para]Sometimes the actual integer index is desired.
#[para]We want to resolve the index used, without passing arbitrary expressions into the 'expr' function - which could have security risks.
#[para]lindex_resolve will parse the index expression and return -1 if the supplied index expression is out of bounds for the supplied list.
#[para]Otherwise it will return an integer corresponding to the position in the list.
#[para]Like Tcl list commands - it will produce an error if the form of the index is not acceptable
#Note that for an index such as $x+1 - we never see the '$x' as it is substituted in the calling command. We will get something like 10+1 - which we will resolve (hopefully safely) with expr
if {![llength $list]} {
return -1
}
set index [string map [list _ ""] $index] ;#forward compatibility with integers such as 1_000
if {[string is integer -strict $index]} {
#can match +i -i
if {$index < 0} {
return -1
} elseif {$index >= [llength $list]} {
return -1
} else {
#integer may still have + sign - normalize with expr
return [expr {$index}]
}
} else {
if {[string match end* $index]} {
if {$index ne "end"} {
set op [string index $index 3]
set offset [string range $index 4 end]
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"}
if {$op eq "+" && $offset != 0} {
return -1
}
} else {
set offset 0
}
#by now, if op = + then offset = 0 so we only need to handle the minus case
if {$offset == 0} {
set index [expr {[llength $list]-1}]
} else {
set index [expr {([llength $list]-1) - $offset}]
}
if {$index < 0} {
return -1
} else {
return $index
}
} else {
#plain +-<int> already handled above.
#we are trying to avoid evaluating unbraced expr of potentially insecure origin
if {[regexp {(.*)([+-])(.*)} $index _match a op b]} {
if {[string is integer -strict $a] && [string is integer -strict $b]} {
if {$op eq "-"} {
set index [expr {$a - $b}]
} else {
set index [expr {$a + $b}]
}
} else {
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
}
} else {
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
}
if {$index < 0 || $index >= [llength $list]} {return -1}
return $index
}
}
}
proc lindex_resolve2 {list index} {
set indices [list] ;#building this may be somewhat expensive in terms of storage and compute for large lists - we could use lseq in Tcl 8.7+ but that's likely unavailable here.
for {set i 0} {$i < [llength $list]} {incr i} {
lappend indices $i
}
set idx [lindex $indices $index]
if {$idx eq ""} {
return -1
} else {
return $idx
}
}
proc lindex_get {list index} {
set resultlist [lrange $list $index $index]
if {![llength $resultlist]} {
return -1
} else {
#we still don't know the actual integer index for an index such as end-x or int-int without parsing and evaluating ourself.
#we can return the value - but only in a way that won't collide with our -1 out-of-range indicator
return [tcl::dict::create value [lindex $resultlist 0]]
}
}
proc K {x y} {return $x}
#*** !doctools
#[call [fun K] [arg x] [arg y]]
#[para]The K-combinator function - returns the first argument, x and discards y
#[para]see [uri https://wiki.tcl-lang.org/page/K]
#[para]It is used in cases where command-substitution at the calling-point performs some desired effect.
proc is_utf8_multibyteprefix {bytes} {
#*** !doctools
#[call [fun is_utf8_multibyteprefix] [arg str]]
#[para] Returns a boolean if str is potentially a prefix for a multibyte utf-8 character
#[para] ie - tests if it is possible that appending more data will result in a utf-8 codepoint
#[para] Will return false for an already complete utf-8 codepoint
#[para] It is assumed the incomplete sequence is at the beginning of the bytes argument
#[para] Suitable input for this might be from the unreturned tail portion of get_utf8_leading $testbytes
#[para] e.g using: set head [lb]get_utf8_leading $testbytes[rb] ; set tail [lb]string range $testbytes [lb]string length $head[rb] end[rb]
regexp {(?x)
^
(?:
[\xC0-\xDF] | #possible prefix for two-byte codepoint
[\xE0-\xEF] [\x80-\xBF]{0,1} | #possible prefix for three-byte codepoint
[\xF0-\xF4] [\x80-\xBF]{0,2} #possible prefix for
)
$
} $bytes
}
proc is_utf8_first {str} {
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-)
^
(?:
[\x00-\x7F] | # Single-byte chars (ASCII range)
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF)
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF)
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5)
)
} $str
}
proc is_utf8_single {1234bytes} {
#*** !doctools
#[call [fun is_utf8_single] [arg 1234bytes]]
#[para] Tests input of 1,2,3 or 4 bytes and responds with a boolean indicating if it is a valid utf-8 character (codepoint)
regexp {(?x) # Expanded regexp syntax, so I can put in comments :-)
^
(?:
[\x00-\x7F] | # Single-byte chars (ASCII range)
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF)
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF)
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5)
)
$
} $1234bytes
}
proc get_utf8_leading {rawbytes} {
#*** !doctools
#[call [fun get_utf8_leading] [arg rawbytes]]
#[para] return the leading portion of rawbytes that is a valid utf8 sequence.
#[para] This will stop at the point at which the bytes can't be interpreted as a complete utf-8 codepoint
#[para] e.g It will not return the first byte or 2 of a 3-byte utf-8 character if the last byte is missing, and will return only the valid utf-8 string from before the first byte of the incomplete character.
#[para] It will also only return the prefix before any bytes that cannot be part of a utf-8 sequence at all.
#[para] Note that while this will return valid utf8 - it has no knowledge of grapheme clusters or diacritics
#[para] This means if it is being used to process bytes split at some arbitrary point - the trailing data that isn't returned could be part of a grapheme cluster that belongs with the last character of the leading string already returned
#[para] The utf-8 BOM \xEF\xBB\xBF is a valid UTF8 3-byte sequence and so can also be returned as part of the leading utf8 bytes
if {[regexp {(?x) # Expanded regexp syntax, so I can put in comments :-)
\A (
[\x00-\x7F] | # Single-byte chars (ASCII range)
[\xC0-\xDF] [\x80-\xBF] | # Two-byte chars (\u0080-\u07FF)
[\xE0-\xEF] [\x80-\xBF]{2} | # Three-byte chars (\u0800-\uFFFF)
[\xF0-\xF4] [\x80-\xBF]{3} # Four-byte chars (U+10000-U+10FFFF, not supported by Tcl 8.5)
) +
} $rawbytes completeChars]} {
return $completeChars
}
return ""
}
proc hex2dec {args} {
#*** !doctools
#[call [fun hex2dec] [opt {option value...}] [arg list_largeHex]]
#[para]Convert a list of (possibly large) unprefixed hex strings to their decimal values
#[para]hex2dec accepts and ignores internal underscores in the same manner as Tcl 8.7+ numbers e.g hex2dec FF_FF returns 65535
#[para]Leading and trailing underscores are ignored as a matter of implementation convenience - but this shouldn't be relied upon.
#[para]Leading or trailing whitespace in each list member is allowed e.g hex2dec " F" returns 15
#[para]Internal whitespace e.g "F F" is not permitted - but a completely empty element "" is allowed and will return 0
set list_largeHex [lindex $args end]
set argopts [lrange $args 0 end-1]
if {[llength $argopts]%2 !=0} {
error "[namespace current]::hex2dec arguments prior to list_largeHex must be option/value pairs - received '$argopts'"
}
set opts [tcl::dict::create\
-validate 1\
-empty_as_hex "INVALID set -empty_as_hex to a hex string e.g FF if empty values should be replaced"\
]
set known_opts [tcl::dict::keys $opts]
foreach {k v} $argopts {
tcl::dict::set opts [tcl::prefix match -message "options for hex2dec. Unexpected option" $known_opts $k] $v
}
# -- --- --- ---
set opt_validate [tcl::dict::get $opts -validate]
set opt_empty [tcl::dict::get $opts -empty_as_hex]
# -- --- --- ---
set list_largeHex [lmap h $list_largeHex[unset list_largeHex] {string map [list _ ""] [string trim $h]}]
if {$opt_validate} {
#Note appended F so that we accept list of empty strings as per the documentation
if {![string is xdigit -strict [join $list_largeHex ""]F ]} {
error "[namespace current]::hex2dec error: non-hex digits encountered after stripping underscores and leading/trailing whitespace for each element\n $list_largeHex"
}
}
if {![string is xdigit -strict [string map [list _ ""] $opt_empty]]} {
#mapping empty string to a value destroys any advantage of -scanonly
#todo - document that -scanonly has 2 restrictions - each element must be valid hex and less than 7 chars long
#set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {0} : {[set v]}}]
if {[lsearch $list_largeHex ""] >=0} {
error "[namespace current]::hex2dec error: empty values in list cannot be mapped to non-hex $opt_empty"
}
} else {
set opt_empty [string trim [string map [list _ ""] $opt_empty]]
if {[set first_empty [lsearch $list_largeHex ""]] >= 0} {
#set list_largeHex [lmap v $list_largeHex[set list_largeHex {}] {expr {$v eq ""} ? {$opt_empty} : {$v}}]
set nonempty_head [lrange $list_largeHex 0 $first_empty-1]
set list_largeHex [concat $nonempty_head [lmap v [lrange $list_largeHex $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]]
}
}
return [scan $list_largeHex [string repeat %llx [llength $list_largeHex]]]
}
proc dec2hex {args} {
#*** !doctools
#[call [fun dex2hex] [opt {option value...}] [arg list_decimals]]
#[para]Convert a list of decimal integers to a list of hex values
#[para] -width <int> can be used to make each hex value at least int characters wide, with leading zeroes.
#[para] -case upper|lower determines the case of the hex letters in the output
set list_decimals [lindex $args end]
set argopts [lrange $args 0 end-1]
if {[llength $argopts]%2 !=0} {
error "[namespace current]::dec2hex arguments prior to list_decimals must be option/value pairs - received '$argopts'"
}
set defaults [tcl::dict::create\
-width 1\
-case upper\
-empty_as_decimal "INVALID set -empty_as_decimal to a number if empty values should be replaced"\
]
set known_opts [tcl::dict::keys $defaults]
set fullopts [tcl::dict::create]
foreach {k v} $argopts {
tcl::dict::set fullopts [tcl::prefix match -message "options for [tcl::namespace::current]::dec2hex. Unexpected option" $known_opts $k] $v
}
set opts [tcl::dict::merge $defaults $fullopts]
# -- --- --- ---
set opt_width [tcl::dict::get $opts -width]
set opt_case [tcl::dict::get $opts -case]
set opt_empty [tcl::dict::get $opts -empty_as_decimal]
# -- --- --- ---
set resultlist [list]
switch -- [string tolower $opt_case] {
upper {
set spec X
}
lower {
set spec x
}
default {
error "[namespace current]::dec2hex unknown value '$opt_case' for -case expected upper|lower"
}
}
set fmt "%${opt_width}.${opt_width}ll${spec}"
set list_decimals [lmap d $list_decimals[unset list_decimals] {string map [list _ ""] [string trim $d]}]
if {![string is digit -strict [string map [list _ ""] $opt_empty]]} {
if {[lsearch $list_decimals ""] >=0} {
error "[namespace current]::dec2hex error: empty values in list cannot be mapped to non-decimal $opt_empty"
}
} else {
set opt_empty [string map [list _ ""] $opt_empty]
if {[set first_empty [lsearch $list_decimals ""]] >= 0} {
set nonempty_head [lrange $list_decimals 0 $first_empty-1]
set list_decimals [concat $nonempty_head [lmap v [lrange $list_decimals $first_empty end] {expr {$v eq ""} ? {$opt_empty} : {$v}}]]
}
}
return [format [lrepeat [llength $list_decimals] $fmt] {*}$list_decimals]
}
proc log2 x "expr {log(\$x)/[expr log(2)]}"
#*** !doctools
#[call [fun log2] [arg x]]
#[para]log base2 of x
#[para]This uses a 'live' proc body - the divisor for the change of base is computed once at definition time
#[para](courtesy of RS [uri https://wiki.tcl-lang.org/page/Additional+math+functions])
proc logbase {b x} {
#*** !doctools
#[call [fun logbase] [arg b] [arg x]]
#[para]log base b of x
#[para]This function uses expr's natural log and the change of base division.
#[para]This means for example that we can get results like: logbase 10 1000 = 2.9999999999999996
#[para]Use expr's log10() function or tcl::mathfunc::log10 for base 10
expr {log($x)/log($b)}
}
proc factors {x} {
#*** !doctools
#[call [fun factors] [arg x]]
#[para]Return a sorted list of the positive factors of x where x > 0
#[para]For x = 0 we return only 0 and 1 as technically any number divides zero and there are an infinite number of factors. (including zero itself in this context)*
#[para]This is a simple brute-force implementation that iterates all numbers below the square root of x to check the factors
#[para]Because the implementation is so simple - the performance is very reasonable for numbers below at least a few 10's of millions
#[para]See tcllib math::numtheory::factors for a more complex implementation - which seems to be slower for 'small' numbers
#[para]Comparisons were done with some numbers below 17 digits long
#[para]For seriously big numbers - this simple algorithm would no doubt be outperformed by more complex algorithms.
#[para]The numtheory library stores some data about primes etc with each call - so may become faster when being used on more numbers
#but has the disadvantage of being slower for 'small' numbers and using more memory.
#[para]If the largest factor below x is needed - the greatestOddFactorBelow and GreatestFactorBelow functions are a faster way to get there than computing the whole list, even for small values of x
#[para]* Taking x=0; Notion of x being divisible by integer y being: There exists an integer p such that x = py
#[para] In other mathematical contexts zero may be considered not to divide anything.
set factors [list 1]
set j 2
set max [expr {sqrt($x)}]
while {$j <= $max} {
if {($x % $j) == 0} {
lappend factors $j [expr {$x / $j}]
}
incr j
}
lappend factors $x
return [lsort -unique -integer $factors]
}
proc oddFactors {x} {
#*** !doctools
#[call [fun oddFactors] [arg x]]
#[para]Return a list of odd integer factors of x, sorted in ascending order
set j 2
set max [expr {sqrt($x)}]
set factors [list 1]
while {$j <= $max} {
if {$x % $j == 0} {
set other [expr {$x / $j}]
if {$other % 2 != 0} {
if {$other ni $factors} {
lappend factors $other
}
}
if {$j % 2 != 0} {
if {$j ni $factors} {
lappend factors $j
}
}
}
incr j
}
return [lsort -integer -increasing $factors]
}
proc greatestFactorBelow {x} {
#*** !doctools
#[call [fun greatestFactorBelow] [arg x]]
#[para]Return the largest factor of x excluding itself
#[para]factor functions can be useful for console layout calculations
#[para]See Tcllib math::numtheory for more extensive implementations
if {$x % 2 == 0 || $x == 0} {
return [expr {$x / 2}]
}
set j 3
set max [expr {sqrt($x)}]
while {$j <= $max} {
if {$x % $j == 0} {
return [expr {$x / $j}]
}
incr j 2
}
return 1
}
proc greatestOddFactorBelow {x} {
#*** !doctools
#[call [fun greatestOddFactorBelow] [arg x]]
#[para]Return the largest odd integer factor of x excluding x itself
if {$x %2 == 0} {
return [greatestOddFactor $x]
}
set j 3
#dumb brute force - time taken to compute is wildly variable on big numbers
#todo - use a (memoized?) generator of primes to reduce the search space
#tcllib math::numtheory has suitable functions - but do we want that dependency here? Testing shows brute-force often faster for small numbers.
set god 1
set max [expr {sqrt($x)}]
while { $j <= $max} {
if {$x % $j == 0} {
set other [expr {$x / $j}]
if {$other % 2 == 0} {
set god $j
} else {
set god [expr {$x / $j}]
#lowest j - so other side must be highest
break
}
}
incr j 2
}
return $god
}
proc greatestOddFactor {x} {
#*** !doctools
#[call [fun greatestOddFactor] [arg x]]
#[para]Return the largest odd integer factor of x
#[para]For an odd value of x - this will always return x
if {$x % 2 != 0 || $x == 0} {
return $x
}
set r [expr {$x / 2}]
while {$r % 2 == 0} {
set r [expr {$r / 2}]
}
return $r
}
proc gcd {n m} {
#*** !doctools
#[call [fun gcd] [arg n] [arg m]]
#[para]Return the greatest common divisor of m and n
#[para]Straight from Lars Hellström's math::numtheory library in Tcllib
#[para]Graphical use:
#[para]An a by b rectangle can be covered with square tiles of side-length c,
#[para]only if c is a common divisor of a and b
#
# Apply Euclid's good old algorithm
#
if { $n > $m } {
set t $n
set n $m
set m $t
}
while { $n > 0 } {
set r [expr {$m % $n}]
set m $n
set n $r
}
return $m
}
proc lcm {n m} {
#*** !doctools
#[call [fun gcd] [arg n] [arg m]]
#[para]Return the lowest common multiple of m and n
#[para]Straight from Lars Hellström's math::numtheory library in Tcllib
#[para]
set gcd [gcd $n $m]
return [expr {$n*$m/$gcd}]
}
proc commonDivisors {x y} {
#*** !doctools
#[call [fun commonDivisors] [arg x] [arg y]]
#[para]Return a list of all the common factors of x and y
#[para](equivalent to factors of their gcd)
return [factors [gcd $x $y]]
}
#experimental only - there are better/faster ways
proc sieve n {
set primes [list]
if {$n < 2} {return $primes}
set nums [tcl::dict::create]
for {set i 2} {$i <= $n} {incr i} {
tcl::dict::set nums $i ""
}
set next 2
set limit [expr {sqrt($n)}]
while {$next <= $limit} {
for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i}
lappend primes $next
tcl::dict::for {next -} $nums break
}
return [concat $primes [tcl::dict::keys $nums]]
}
proc sieve2 n {
set primes [list]
if {$n < 2} {return $primes}
set nums [tcl::dict::create]
for {set i 2} {$i <= $n} {incr i} {
tcl::dict::set nums $i ""
}
set next 2
set limit [expr {sqrt($n)}]
while {$next <= $limit} {
for {set i $next} {$i <= $n} {incr i $next} {tcl::dict::unset nums $i}
lappend primes $next
#dict for {next -} $nums break
set next [lindex $nums 0]
}
return [concat $primes [tcl::dict::keys $nums]]
}
proc hasglobs {str} {
#*** !doctools
#[call [fun hasglobs] [arg str]]
#[para]Return a boolean indicating whether str contains any of the glob characters: * ? [lb] [rb]
#[para]hasglobs uses append to preserve Tcls internal representation for str - so it should help avoid shimmering in the few cases where this may matter.
regexp {[*?\[\]]} [append obj2 $str {}] ;# int-rep preserving
}
proc trimzero {number} {
#*** !doctools
#[call [fun trimzero] [arg number]]
#[para]Return number with left-hand-side zeros trimmed off - unless all zero
#[para]If number is all zero - a single 0 is returned
set trimmed [string trimleft $number 0]
if {[string length $trimmed] == 0} {
set trimmed 0
}
return $trimmed
}
proc substring_count {str substring} {
#*** !doctools
#[call [fun substring_count] [arg str] [arg substring]]
#[para]Search str and return number of occurrences of substring
#faster than lsearch on split for str of a few K
if {$substring eq ""} {return 0}
set occurrences [expr {[string length $str]-[string length [string map [list $substring {}] $str]]}]
return [expr {$occurrences / [string length $substring]}]
}
proc dict_merge_ordered {defaults main} {
#*** !doctools
#[call [fun dict_merge_ordered] [arg defaults] [arg main]]
#[para]The standard dict merge accepts multiple dicts with values from dicts to the right (2nd argument) taking precedence.
#[para]When merging with a dict of default values - this means that any default key/vals that weren't in the main dict appear in the output before the main data.
#[para]This function merges the two dicts whilst maintaining the key order of main followed by defaults.
#1st merge (inner merge) with wrong values taking precedence - but right key-order - then (outer merge) restore values
return [tcl::dict::merge [tcl::dict::merge $main $defaults] $main]
}
proc askuser {question} {
#*** !doctools
#[call [fun askuser] [arg question]]
#[para]A basic utility to read an answer from stdin
#[para]The prompt is written to the terminal and then it waits for a user to type something
#[para]stdin is temporarily configured to blocking and then put back in its original state in case it wasn't already so.
#[para]If the terminal is using punk::console and is in raw mode - the terminal will temporarily be put in line mode.
#[para](Generic terminal raw vs linemode detection not yet present)
#[para]The user must hit enter to submit the response
#[para]The return value is the string if any that was typed prior to hitting enter.
#[para]The question argument can be manually colourised using the various punk::ansi funcitons
#[example_begin]
# set answer [lb]punk::lib::askuser "[lb]a+ green bold[rb]Do you want to proceed? (Y|N)[lb]a[rb]"[rb]
# if {[lb]string match y* [lb]string tolower $answer[rb][rb]} {
# puts "Proceeding"
# } else {
# puts "Cancelled by user"
# }
#[example_end]
puts stdout $question
flush stdout
set stdin_state [fconfigure stdin]
if {[catch {
package require punk::console
set console_raw [set ::punk::console::is_raw]
} err_console]} {
#assume normal line mode
set console_raw 0
}
try {
fconfigure stdin -blocking 1
if {$console_raw} {
punk::console::disableRaw
set answer [gets stdin]
punk::console::enableRaw
} else {
set answer [gets stdin]
}
} finally {
fconfigure stdin -blocking [tcl::dict::get $stdin_state -blocking]
}
return $answer
}
#like textutil::adjust::indent - but doesn't strip trailing lines, and doesn't implement skip parameter.
proc indent {text {prefix " "}} {
set result [list]
foreach line [split $text \n] {
if {[string trim $line] eq ""} {
lappend result ""
} else {
lappend result $prefix[string trimright $line]
}
}
return [join $result \n]
}
proc undent {text} {
if {$text eq ""} {
return ""
}
set lines [split $text \n]
set nonblank [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
continue
}
lappend nonblank $ln
}
set lcp [longestCommonPrefix $nonblank]
if {$lcp eq ""} {
return $text
}
regexp {^([\t ]*)} $lcp _m lcp
if {$lcp eq ""} {
return $text
}
set len [string length $lcp]
set result [list]
foreach ln $lines {
if {[string trim $ln] eq ""} {
lappend result ""
} else {
lappend result [string range $ln $len end]
}
}
return [join $result \n]
}
#A version of textutil::string::longestCommonPrefixList
proc longestCommonPrefix {items} {
if {[llength $items] <= 1} {
return [lindex $items 0]
}
set items [lsort $items[unset items]]
set min [lindex $items 0]
set max [lindex $items end]
#if first and last of sorted list share a prefix - then all do (first and last of sorted list are the most different in the list)
#(sort order nothing to do with length - e.g min may be longer than max)
if {[string length $min] > [string length $max]} {
set temp $min
set min $max
set max $temp
}
set n [string length $min]
set prefix ""
set i -1
while {[incr i] < $n && ([set c [string index $min $i]] eq [string index $max $i])} {
append prefix $c
}
return $prefix
}
#test example of the technique - not necessarily particularly useful as a function, except maybe for brevity/clarity. todo - test if inlined version gives any perf advantage compared to a temp var
proc swapnumvars {namea nameb} {
upvar $namea a $nameb b
set a [expr {$a ^ $b}]
set b [expr {$a ^ $b}]
set a [expr {$a ^ $b}]
}
#e.g linesort -decreasing $data
proc linesort {args} {
#*** !doctools
#[call [fun linesort] [opt {sortoption ?val?...}] [arg textblock]]
#[para]Sort lines in textblock
#[para]Returns another textblock with lines sorted
#[para]options are flags as accepted by lsort ie -ascii -command -decreasing -dictionary -index -indices -integer -nocase -real -stride -unique
if {[llength $args] < 1} {
error "linesort missing lines argument"
}
set lines [lindex $args end]
set opts [lrange $args 0 end-1]
#.= list $lines |@0,sortopts/1> linelist |> .=data>1,sortopts>1* lsort |> list_as_lines <| {*}$opts
list_as_lines [lsort {*}$opts [linelist $lines]]
}
proc list_as_lines {args} {
#*** !doctools
#[call [fun list_as_lines] [opt {-joinchar char}] [arg linelist]]
#[para]This simply joines the elements of the list with -joinchar
#[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines <le>
#[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line.
if {[set eop [lsearch $args --]] == [llength $args]-2} {
#end-of-opts not really necessary - except for consistency with lines_as_list
set args [concat [lrange $args 0 $eop-1] [lrange $args $eop+1 end]]
}
if {[llength $args] == 3 && [lindex $args 0] eq "-joinchar"} {
set joinchar [lindex $args 1]
set lines [lindex $args 2]
} elseif {[llength $args] == 1} {
set joinchar "\n"
set lines [lindex $args 0]
} else {
error "list_as_lines usage: list_as_lines ?-joinchar <char>? <linelist>"
}
return [join $lines $joinchar]
}
proc list_as_lines2 {args} {
#eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible?
lassign [tcl::dict::values [punk::args::get_dict {
-joinchar -default \n
*values -min 1 -max 1
} $args]] opts values
puts "opts:$opts"
puts "values:$values"
return [join [tcl::dict::get $values 0] [tcl::dict::get $opts -joinchar]]
}
proc lines_as_list {args} {
#*** !doctools
#[call [fun lines_as_list] [opt {option value ...}] [arg text]]
#[para]Returns a list of possibly trimmed lines depeding on options
#[para]The concept of lines is raw lines from splitting on newline after crlf is mapped to lf
#[para]- not console lines which may be entirely different due to control characters such as vertical tabs or ANSI movements
#The underlying function linelist has the validation code which gives nicer usage errors.
#we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error
#..because we don't know what to say if there are odd numbers of args
#we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work
#e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway
if {[lsearch $args "--"] == [llength $args]-2} {
set opts [lrange $args 0 end-2]
} else {
set opts [lrange $args 0 end-1]
}
#set opts [tcl::dict::merge {-block {}} $opts]
set bposn [lsearch $opts -block]
if {$bposn < 0} {
lappend opts -block {}
}
set text [lindex $args end]
tailcall linelist {*}$opts $text
}
#this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds
proc lines_as_list2 {args} {
#pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults
#-anyopts 1 avoids having to know what to say if odd numbers of options passed etc
#we don't have to decide what is an opt vs a value
#even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block)
lassign [tcl::dict::values [punk::args::get_dict {
*opts -any 1
-block -default {}
} $args]] opts valuedict
tailcall linelist {*}$opts {*}[tcl::dict::values $valuedict]
}
# important for pipeline & match_assign
# -line trimline|trimleft|trimright -block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty -commandprefix {string length} ?
# -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace
proc linelist {args} {
set usage "linelist ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix <cmdlist> text"
if {[llength $args] == 0} {
error "linelist missing textchunk argument usage:$usage"
}
set text [lindex $args end]
set text [string map [list \r\n \n] $text] ;#review - option?
set arglist [lrange $args 0 end-1]
set opts [tcl::dict::create\
-block {trimhead1 trimtail1}\
-line {}\
-commandprefix ""\
-ansiresets auto\
-ansireplays 0\
]
foreach {o v} $arglist {
switch -- $o {
-block - -line - -commandprefix - -ansiresets - -ansireplays {
tcl::dict::set opts $o $v
}
default {
error "linelist: Unrecognized option '$o' usage:$usage"
}
}
}
# -- --- --- --- --- ---
set opt_block [tcl::dict::get $opts -block]
if {[llength $opt_block]} {
foreach bo $opt_block {
switch -- $bo {
trimhead - trimtail - triminner - trimall - trimhead1 - trimtail1 - collateempty {}
default {
set known_blockopts [list trimhead trimtail triminner trimall trimhead1 trimtail1 collateempty]
error "linelist: unknown -block option value: $bo known values: $known_blockopts"
}
}
}
#normalize certain combos
if {[set posn [lsearch $opt_block trimhead1]] >=0 && "trimhead" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {[set posn [lsearch $opt_block trimtail1]] >=0 && "trimtail" in $opt_block} {
set opt_block [lreplace $opt_block $posn $posn]
}
if {"trimall" in $opt_block} {
#no other block options make sense in combination with this
set opt_block [list "trimall"]
}
#TODO
if {"triminner" in $opt_block } {
error "linelist -block triminner not implemented - sorry"
}
}
# -- --- --- --- --- ---
set opt_line [tcl::dict::get $opts -line]
set tl_left 0
set tl_right 0
set tl_both 0
foreach lo $opt_line {
switch -- $lo {
trimline {
set tl_both 1
}
trimleft {
set tl_left 1
}
trimright {
set tl_right 1
}
default {
set known_lineopts [list trimline trimleft trimright]
error "linelist: unknown -line option value: $lo known values: $known_lineopts"
}
}
}
#normalize trimleft trimright combo
if {$tl_left && $tl_right} {
set opt_line [list "trimline"]
set tl_both 1
}
# -- --- --- --- --- ---
set opt_commandprefix [tcl::dict::get $opts -commandprefix]
# -- --- --- --- --- ---
set opt_ansiresets [tcl::dict::get $opts -ansiresets]
# -- --- --- --- --- ---
set opt_ansireplays [tcl::dict::get $opts -ansireplays]
if {$opt_ansireplays} {
if {$opt_ansiresets eq "auto"} {
set opt_ansiresets 1
}
} else {
if {$opt_ansiresets eq "auto"} {
set opt_ansiresets 0
}
}
# -- --- --- --- --- ---
set linelist [list]
set nlsplit [split $text \n]
if {![llength $opt_line]} {
set linelist $nlsplit
#lappend linelist {*}$nlsplit
} else {
#already normalized trimleft+trimright to trimline
if {$tl_both} {
foreach ln $nlsplit {
lappend linelist [string trim $ln]
}
} elseif {$tl_left} {
foreach ln $nlsplit {
lappend linelist [string trimleft $ln]
}
} elseif {$tl_right} {
foreach ln $nlsplit {
lappend linelist [string trimright $ln]
}
}
}
if {"collateempty" in $opt_block} {
set inputlist $linelist[set linelist [list]]
set last "-"
foreach input $inputlist {
if {$input ne ""} {
lappend linelist $input
set last "-"
} else {
if {$last ne ""} {
lappend linelist ""
}
set last ""
}
}
}
if {"trimall" in $opt_block} {
set linelist [lsearch -all -inline -not -exact $linelist[set linelist {}] ""]
} else {
set start 0
if {"trimhead" in $opt_block} {
set idx 0
set lastempty -1
foreach ln $linelist {
if {[lindex $linelist $idx] ne ""} {
break
} else {
set lastempty $idx
}
incr idx
}
if {$lastempty >=0} {
set start [expr {$lastempty +1}]
}
}
set linelist [lrange $linelist $start end]
if {"trimtail" in $opt_block} {
set revlinelist [lreverse $linelist][set linelist {}]
set i 0
foreach ln $revlinelist {
if {$ln ne ""} {
set linelist [lreverse [lrange $revlinelist $i end]]
break
}
incr i
}
}
# --- ---
set start 0
set end "end"
if {"trimhead1" in $opt_block} {
if {[lindex $linelist 0] eq ""} {
set start 1
}
}
if {"trimtail1" in $opt_block} {
if {[lindex $linelist end] eq ""} {
set end "end-1"
}
}
set linelist [lrange $linelist $start $end]
}
#review - we need to make sure ansiresets don't accumulate/grow on any line
#Each resulting line should have a reset of some type at start and a pure-reset at end to stop
#see if we can find an ST sequence that most terminals will not display for marking sections?
if {$opt_ansireplays} {
package require punk::ansi
if {$opt_ansiresets} {
set RST [punk::ansi::a]
} else {
set RST ""
}
set replaycodes $RST ;#todo - default?
set transformed [list]
#shortcircuit common case of no ansi
if {![punk::ansi::ta::detect $linelist]} {
if {$opt_ansiresets} {
foreach ln $linelist {
lappend transformed $RST$ln$RST
}
set linelist $transformed
}
} else {
#INLINE punk::ansi::codetype::is_sgr_reset
#regexp {\x1b\[0*m$} $code
set re_is_sgr_reset {\x1b\[0*m$}
#INLINE punk::ansi::codetype::is_sgr
#regexp {\033\[[0-9;:]*m$} $code
set re_is_sgr {\x1b\[[0-9;:]*m$}
foreach ln $linelist {
#set is_replay_pure_reset [regexp {\x1b\[0*m$} $replaycodes] ;#only looks at tail code - but if tail is pure reset - any prefix is ignorable
set ansisplits [punk::ansi::ta::split_codes_single $ln]
if {[llength $ansisplits]<= 1} {
#plaintext only - no ansi codes in line
lappend transformed [string cat $replaycodes $ln $RST]
#leave replaycodes as is for next line
set nextreplay $replaycodes
} else {
set tail $RST
set lastcode [lindex $ansisplits end-1] ;#may or may not be SGR
if {[punk::ansi::codetype::is_sgr_reset $lastcode]} {
if {[lindex $ansisplits end] eq ""} {
#last plaintext is empty. So the line is already suffixed with a reset
set tail ""
set nextreplay $RST
} else {
#trailing text has been reset within line - but no tail reset present
#we normalize by putting a tail reset on anyway
set tail $RST
set nextreplay $RST
}
} elseif {[lindex $ansisplits end] ne "" && [punk::ansi::codetype::has_sgr_leadingreset $lastcode]} {
#No tail reset - and no need to examine whole line to determine stack that is in effect
set tail $RST
set nextreplay $lastcode
} else {
#last codeset doesn't reset from earlier codes or isn't SGR - so we have to look at whole line to determine codes in effect
#last codeset doesn't end in a pure-reset
#whether code was at very end or not - add a reset tail
set tail $RST
#determine effective replay for line
set codestack [list start]
foreach {pt code} $ansisplits {
if {[punk::ansi::codetype::is_sgr_reset $code]} {
set codestack [list] ;#different from 'start' marked - this means we've had a reset
} elseif {[punk::ansi::codetype::has_sgr_leadingreset $code]} {
set codestack [list $code]
} else {
if {[punk::ansi::codetype::is_sgr $code]} {
#todo - proper test of each code - so we only take latest background/foreground etc.
#requires handling codes with varying numbers of parameters.
#basic simplification - remove straight dupes.
set dup_posns [lsearch -all -exact $codestack $code] ;#!must use -exact as codes have square brackets which are interpreted as glob chars.
set codestack [lremove $codestack {*}$dup_posns]
lappend codestack $code
} ;#else gx0 or other code - we don't want to stack it with SGR codes
}
}
if {$codestack eq [list start]} {
#No SGRs - may have been other codes
set line_has_sgr 0
} else {
#list is either empty or begins with start - empty means it had SGR reset - so it still invalidates current state of replaycodes
set line_has_sgr 1
if {[lindex $codestack 0] eq "start"} {
set codestack [lrange $codestack 1 end]
}
}
#set newreplay [join $codestack ""]
set newreplay [punk::ansi::codetype::sgr_merge_list {*}$codestack]
if {$line_has_sgr && $newreplay ne $replaycodes} {
#adjust if it doesn't already does a reset at start
if {[punk::ansi::codetype::has_sgr_leadingreset $newreplay]} {
set nextreplay $newreplay
} else {
set nextreplay $RST$newreplay
}
} else {
set nextreplay $replaycodes
}
}
if {[punk::ansi::codetype::has_sgr_leadingreset $ln]} {
#no point attaching any replay
lappend transformed [string cat $ln $tail]
} else {
lappend transformed [string cat $replaycodes $ln $tail]
}
}
set replaycodes $nextreplay
}
set linelist $transformed
}
}
if {[llength $opt_commandprefix]} {
set transformed [list]
foreach ln $linelist {
lappend transformed [{*}$opt_commandprefix $ln]
}
set linelist $transformed
}
return $linelist
}
interp alias {} errortime {} punk::lib::errortime
proc errortime {script groupsize {iters 2}} {
#by use MAK from https://wiki.tcl-lang.org/page/How+to+Measure+Performance
set i 0
set times {}
if {$iters < 2} {set iters 2}
for {set i 0} {$i < $iters} {incr i} {
set result [uplevel [list time $script $groupsize]]
lappend times [lindex $result 0]
}
set average 0.0
set s2 0.0
foreach time $times {
set average [expr {$average + double($time)/$iters}]
}
foreach time $times {
set s2 [expr {$s2 + (($time-$average)*($time-$average) / ($iters-1))}]
}
set sigma [expr {int(sqrt($s2))}]
set average [expr int($average)]
return "$average +/- $sigma microseconds per iteration"
}
#test function to use with show_jump_tables
#todo - check if switch compilation to jump tables differs by Tcl version
proc switch_char_test {c} {
set dec [scan $c %c]
foreach t [list 1 2 3] {
switch -- $c {
x {
return [list $dec x $t]
}
y {
return [list $dec y $t]
}
z {
return [list $dec z $t]
}
}
}
#tcl 8.6/8.7 (at least)
#curlies must be unescaped and unbraced to work as literals in switch and enable it to compile to jumpTable
switch -- $c {
a {
return [list $dec a]
}
{"} {
return [list $dec dquote]
}
{[} {return [list $dec lb]}
{]} {return [list $dec rb]}
"{" {
return [list $dec lbrace]
}
"}" {
return [list $dec rbrace]
}
default {
return [list $dec $c]
}
}
}
#we are interested in seeing jumpTable line and following lines up until next line starting with "Command" or bracketed number e.g (164)
proc show_jump_tables {args} {
#avoiding use of 'info cmdtype' as unavaliable in safe interps as at 2024-06.
if {[llength $args] == 1} {
set data [tcl::unsupported::disassemble proc [lindex $args 0]]
} elseif {[llength $args] == 2} {
#review - this looks for direct methods on the supplied object/class, and then tries to disassemble method on the supplied class or class of supplied object if it isn't a class itself.
#not sure if this handles more complex hierarchies or mixins etc.
lassign $args obj method
if {![info object isa object $obj]} {
error "show_jump_tables unable to examine '$args'. $obj is not an oo object"
}
#classes are objects too and can have direct methods
if {$method in [info object methods $obj]} {
set data [tcl::unsupported::disassemble objmethod $obj $method]
} else {
if {![info object isa class $obj]} {
set obj [info object class $obj]
}
set data [tcl::unsupported::disassemble method $obj $method]
}
} else {
error "show_jump_tables expected a procname or a class/object and method"
}
set result ""
set in_jt 0
foreach ln [split $data \n] {
set tln [string trim $ln]
if {!$in_jt} {
if {[string match *jumpTable* $ln]} {
append result $ln \n
set in_jt 1
}
} else {
if {[string match Command* $tln] || [string match "(*) *" $tln]} {
set in_jt 0
} else {
append result $ln \n
}
}
}
return $result
}
proc temperature_f_to_c {deg_fahrenheit} {
return [expr {($deg_fahrenheit -32) * (5/9.0)}]
}
proc temperature_c_to_f {deg_celsius} {
return [expr {($deg_celsius * (9/5.0)) + 32}]
}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::lib ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#todo - way to generate 'internal' docs separately?
#*** !doctools
#[section Internal]
namespace eval punk::lib::system {
#*** !doctools
#[subsection {Namespace punk::lib::system}]
#[para] Internal functions that are not part of the API
#[list_begin definitions]
proc has_script_var_bug {} {
set script {set j [list spud] ; list}
append script \n
uplevel #0 $script
set rep1 [tcl::unsupported::representation $::j]
set script ""
set rep2 [tcl::unsupported::representation $::j]
set nostring1 [string match "*no string" $rep1]
set nostring2 [string match "*no string" $rep2]
#we assume it should have no string rep in either case
#Review: check Tcl versions for behaviour/consistency
if {!$nostring2} {
return true
} else {
return false
}
}
proc has_safeinterp_compile_bug {{show 0}} {
#ensemble calls within safe interp not compiled
namespace eval [namespace current]::testcompile {
proc ensembletest {} {string index a 0}
}
set has_bug 0
set bytecode_outer [tcl::unsupported::disassemble proc [namespace current]::testcompile::ensembletest]
if {$show} {
puts outer:
puts $bytecode_outer
}
if {![interp issafe]} {
#test of safe subinterp only needed if we aren't already in a safe interp
if {![catch {
interp create x -safe
} errMsg]} {
x eval {proc ensembletest {} {string index a 0}}
set bytecode_safe [x eval {tcl::unsupported::disassemble proc ::ensembletest}]
if {$show} {
puts safe:
puts $bytecode_safe
}
interp delete x
#mainly we expect the safe interp might contain invokeStk - indicating not byte compiled (or we would see strindex instead)
#It's possible the interp we're running in is also not compiling ensembles.
#we could then get a result of 2 - which still indicates a problem
if {[string last "invokeStk" $bytecode_safe] >= 1} {
incr has_bug
}
} else {
#our failure to create a safe interp here doesn't necessarily mean the Tcl version doesn't have the problem - but we could end up returning zero if somehow safe interp can't be created from unsafe interp?
#unlikely - but we should warn
puts stderr "Unable to create a safe sub-interp to test - result only indicates status of current interpreter"
}
}
namespace delete [namespace current]::testcompile
if {[string last "invokeStk" $bytecode_outer] >= 1} {
incr has_bug
}
return $has_bug
}
proc mostFactorsBelow {n} {
##*** !doctools
#[call [fun mostFactorsBelow] [arg n]]
#[para]Find the number below $n which has the greatest number of factors
#[para]This will get slow quickly as n increases (100K = 1s+ 2024)
set most 0
set mostcount 0
for {set i 1} {$i < $n} {incr i} {
set fc [llength [punk::lib::factors $i]]
if {$fc > $mostcount} {
set most $i
set mostcount $fc
}
}
return [list number $most numfactors $mostcount]
}
proc factorCountBelow_punk {n} {
##*** !doctools
#[call [fun factorCountBelow] [arg n]]
#[para]For numbers 1 to n - keep a tally of the total count of factors
#[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result
#[para]and as a rudimentary performance comparison
#[para]gets slow quickly!
set tally 0
for {set i 1} {$i <= $n} {incr i} {
incr tally [llength [punk::lib::factors $i]]
}
return $tally
}
proc factorCountBelow_numtheory {n} {
##*** !doctools
#[call [fun factorCountBelow] [arg n]]
#[para]For numbers 1 to n - keep a tally of the total count of factors
#[para]This is not useful other than a quick and dirty check that different algorithms return *probably* the same result
#[para]and as a rudimentary performance comparison
#[para]gets slow quickly! (significantly slower than factorCountBelow_punk)
package require math::numtheory
set tally 0
for {set i 1} {$i <= $n} {incr i} {
incr tally [llength [math::numtheory::factors $i]]
}
return $tally
}
proc factors2 {x} {
##*** !doctools
#[call [fun factors2] [arg x]]
#[para]Return a sorted list of factors of x
#[para]A similar brute-force mechanism to factors - but keeps result ordering as we go.
set smallfactors [list 1]
set j 2
set max [expr {sqrt($x)}]
while {$j < $max} {
if {($x % $j) == 0} {
lappend smallfactors $j
lappend largefactors [expr {$x / $j}]
}
incr j
}
#handle sqrt outside loop so we don't have to sort/dedup or check list membership in main loop
if {($x % $j) == 0} {
if {$j == ($x / $j)} {
lappend smallfactors $j
}
}
return [concat $smallfactors [lreverse $largefactors] $x]
}
# incomplte - report which is the innermost bracket/quote etc awaiting completion for a Tcl command
#important - used by punk::repl
proc incomplete {partial} {
#we can apparently get away without concatenating current innerpartial to previous in list - REVIEW.
if {[info complete $partial]} {
return [list]
}
set clist [split $partial ""]
#puts stderr "-->$clist<--"
set waiting [list ""]
set innerpartials [list ""]
set escaped 0
set i 0
foreach c $clist {
if {$c eq "\\"} {
set escaped [expr {!$escaped}]
incr i
continue
} ;# set escaped 0 at end
set p [lindex $innerpartials end]
if {$escaped == 0} {
#NOTE - curly braces as switch arm keys must be unescaped and balanced. (escapes stop byte-compilation to jumpTable for switch statements for tcl8.6/8.7 at least)
switch -- $c {
{"} {
if {![info complete ${p}]} {
lappend waiting {"}
lappend innerpartials ""
} else {
if {[lindex $waiting end] eq {"}} {
#this quote is endquote
set waiting [lrange $waiting 0 end-1]
set innerpartials [lrange $innerpartials 0 end-1]
} else {
if {![info complete ${p}$c]} {
lappend waiting {"}
lappend innerpartials ""
} else {
set p ${p}${c}
lset innerpartials end $p
}
}
}
}
{[} {
if {![info complete ${p}$c]} {
lappend waiting "\]"
lappend innerpartials ""
} else {
set p ${p}${c}
lset innerpartials end $p
}
}
"{" {
if {![info complete ${p}$c]} {
lappend waiting "\}"
lappend innerpartials ""
} else {
set p ${p}${c}
lset innerpartials end $p
}
}
"}" -
default {
set waitingfor [lindex $waiting end]
if {$c eq "$waitingfor"} {
set waiting [lrange $waiting 0 end-1]
set innerpartials [lrange $innerpartials 0 end-1]
} else {
set p ${p}${c}
lset innerpartials end $p
}
}
}
} else {
set p ${p}${c}
lset innerpartials end $p
}
set escaped 0
incr i
}
set incomplete [list]
foreach w $waiting {
#to be treated as literals - curly braces must be unescaped here - and balanced - hence the left-curly empty arm.
switch -- $w {
{"} {
lappend incomplete $w
}
{]} {
lappend incomplete "\["
}
"{" {}
"}" {
lappend incomplete "\{"
}
}
}
set debug 0
if {$debug} {
foreach w $waiting p $innerpartials {
puts stderr "->awaiting:'$w' partial: $p"
}
}
return $incomplete
}
#This only works for very simple cases will get confused with for example:
# {set x "a["""}
proc incomplete_naive {partial} {
if {[info complete $partial]} {
return [list]
}
set clist [split $partial ""]
set waiting [list]
set escaped 0
foreach c $clist {
if {$c eq "\\"} {
set escaped [expr {!$escaped}]
continue
}
if {!$escaped} {
if {$c eq {"}} {
if {[lindex $waiting end] eq {"}} {
set waiting [lrange $waiting 0 end-1]
} else {
lappend waiting {"}
}
} elseif {$c eq "\["} {
lappend waiting "\]"
} elseif {$c eq "\{"} {
lappend waiting "\}"
} else {
set waitingfor [lindex $waiting end]
if {$c eq "$waitingfor"} {
set waiting [lrange $waiting 0 end-1]
}
}
}
}
set incomplete [list]
foreach w $waiting {
if {$w eq {"}} {
lappend incomplete $w
} elseif {$w eq "\]"} {
lappend incomplete "\["
} elseif {$w eq "\}"} {
lappend incomplete "\{"
}
}
return $incomplete
}
#*** !doctools
#[list_end] [comment {--- end definitions namespace punk::lib::system ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
package provide punk::lib [tcl::namespace::eval punk::lib {
variable pkg punk::lib
variable version
set version 999999.0a1.0
}]
return
#*** !doctools
#[manpage_end]