#we will accept and pass through the less common colon separator (ITU Open Document Architecture)
@ -701,13 +745,13 @@ namespace eval punk::ansi {
#review - what about CSI n : m H where row n happens to be current line?
regexp {\033\[[0-9]*(:?C|D|G)$}
}
#pure SGR reset
#pure SGR reset with no other functions
proc is_sgr_reset {code} {
#todo 8-bit csi
regexp {\033\[0*m$} $code
}
#whether this code has 0 (or equivalently empty) parameter (but may set others)
#if an SGR code as a reset in it - we don't need to carry forward any previous SGR codes
#if an SGR code has a reset in it - we don't need to carry forward any previous SGR codes
#it generally only makes sense for the reset to be the first entry - otherwise the code has ineffective portions
#However - detecting zero or empty parameter in other positions requires knowing all other codes that may allow zero or empty params.
#We will only look at initial parameter as this is the well-formed normal case.
@ -723,6 +767,9 @@ namespace eval punk::ansi {
return 0
}
}
#has_sgr_reset - rather than support this function - create an sgr normalize function that removes dead params and brings reset to front of param list?
#[para]Working with strings containing ansi in a way that preserves/understands the codes is always going to be significantly slower than working with plain strings
#[para]Just as working with other forms of markup such as HTML - you simply need to be aware of the tradeoffs and design accordingly.
#[para]Returns the length of the string without ansi codes
#[para]This will not count strings hidden inside a 'privacy message' or other ansi codes which may have content between their opening escape and their termination sequence.
#[para]This is equivalent to calling string length on the result of stripansi $string
#[para]Note that this returns the number of characters in the payload, and is not always the same as the width of the string as rendered on a terminal.
#[para]To get the width, use punk::ansi::printing_length instead, which is also ansi aware.
string length [stripansi $string]
}
proc trimleft {string args} {
set intext 0
set out ""
#for split_codes only first or last pt can be empty string
foreach {pt ansiblock} [split_codes $string] {
if {!$intext} {
if {$pt eq "" || [regexp {^\s+$} $pt]} {
append out $ansiblock
} else {
append out [string trimleft $pt]$ansiblock
set intext 1
}
} else {
append out $pt$ansiblock
}
}
return $out
}
proc trimright {string} {
if {$string eq ""} {return ""} ;#excludes the case where split_codes would return nothing
set rtrimmed_list [lreverse [_splits_trimleft [lreverse [split_codes $string]]]]
return [join $rtrimmed_list ""]
}
proc trim {string} {
#make sure we do our ansi-scanning split only once - so use list-based trim operations
#order of left vs right probably makes zero difference - as any reduction the first operation can do is only in terms of characters at other end of list - not in total list length
#we save a single function call by calling both here rather than _splits_trim
#[para]Takes a string that possibly contains ansi codes such as colour,underline etc (SGR codes)
#[para]Returns the character (with applied ansi effect) at position index
#[para]The string could contain non SGR ansi codes - and these will (mostly) be ignored, so shouldn't affect the output.
#[para]Some terminals don't hide 'privacy message' and other strings within an ESC X ESC ^ or ESC _ sequence (terminated by ST)
#[para]It's arguable some of these are application specific - but this function takes the view that they are probably non-displaying - so index won't see them.
#[para]todo: SGR codes within ST-terminated strings not yet ignored properly
#[para]If the caller wants just the character - they should use a normal string index after calling stripansi, or call stripansi afterwards.
#[para]As any operation using end-+<int> will need to strip ansi to precalculate the length anyway; the caller should probably just use stripansi and standard string index if the ansi coded output isn't required and they are using and end-based index.
#[para]In fact, any operation where the ansi info isn't required in the output would probably be slightly more efficiently obtained by using stripansi and normal string operations on that.
#[para]The returned character will (possibly) have a leading ansi escape sequence but no trailing escape sequence - even if the string was taken from a position immediately before a reset or other SGR ansi code
#[para]The ansi-code prefix in the returned string is built up by concatenating previous SGR ansi codes seen - but it is optimised to re-start the process if any full SGR reset is encountered.
#[para]The code sequence doesn't detect individual properties being turned on and then off again, only full resets; so in some cases the ansi-prefix may not be as short as it could be.
#[para]This shouldn't make any difference to the visual output - but a possible future enhancement is something to produce the shortest ansi sequence possible
#[para]Notes:
#[para]This function has to split the whole string into plaintext & ansi codes even for a very low index
#[para]Some sort of generator that parses more of the string as required might be more efficient for large chunks.
#[para]For end-x operations we have to pre-calculate the content-length by stripping the ansi - which is also potentially sub-optimal
set splits [split_codes_single $string]; #we get empty pt(plaintext) between each ansi code that is in a run
#todo - end-x +/-x+/-x etc
set original_index $index
set index [string map [list _ ""] $index]
#short-circuit some trivial cases
if {[string is integer -strict $index]} {
if {$index < 0} {return ""}
#this only short-circuits an index greater than length including ansi-chars
#we don't want to spend cycles stripping ansi for this test so code below will still have to handle index just larger than content-length but still less than entire length
if {$index > [string length $string]} {return ""}
} else {
if {[string match end* $index]} {
#for end- we will probably have to blow a few cycles stripping first and calculate the length
if {$index ne "end"} {
set op [string index $index 3]
set offset [string range $index 4 end]
if {$op ni {+ -} || ![string is integer -strict $offset]} {error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"}
if {$op eq "+" && $offset != 0} {
return ""
}
} else {
set offset 0
}
#by now, if op = + then offset = 0 so we only need to handle the minus case
set payload_len [punk::ansi::ansistring::length $string] ;#a little bit wasteful - but hopefully no big deal
if {$offset == 0} {
set index [expr {$payload_len-1}]
} else {
set index [expr {($payload_len-1) - $offset}]
}
if {$index < 0} {
#don't waste time splitting and looping the string
return ""
}
} else {
#we are trying to avoid evaluating unbraced expr of potentially insecure origin
regexp {^([+-]{0,1})(.*)} $index _match sign tail ;#should always match - even empty string
if {[string is integer -strict $tail]} {
#plain +-<int>
if {$op eq "-"} {
#return nothing for negative indices as per Tcl's lindex etc
return ""
}
set index $tail
} else {
if {[regexp {(.*)([+-])(.*)} $index _match a op b]} {
if {[string is integer -strict $a] && [string is integer -strict $b]} {
if {$op eq "-"} {
set index [expr {$a - $b}]
} else {
set index [expr {$a + $b}]
}
} else {
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
}
} else {
error "bad index '$index': must be integer?\[+-\]integer? or end?\[+-\]integer?"
}
}
}
}
#any pt could be empty if using split_codes_single (or just first and last pt if split_codes)
set low -1
set high -1
set pt_index -2
set pt_found -1
set char ""
set codes_in_effect ""
#we can't only apply leading sequence from previous code - as there may be codes in effect from earlier, so we have to track as we go
#(this would apply even if we used split_codes - but then we would need to do further splitting of each codeset anyway)
foreach {pt code} $splits {
incr pt_index 2
if {$pt ne ""} {
set low [expr {$high + 1}] ;#last high
incr high [string length $pt]
}
if {$pt ne "" && ($index >= $low && $index <= $high)} {
set pt_found $pt_index
set char [string index $pt $index-$low]
break
}
if {[punk::ansi::codetype::is_sgr_reset $code]} {
#we can throw away previous codes_in_effect
set codes_in_effect ""
} else {
#may have partial resets - but we don't want to track individual states of SGR features
#A possible feature would be some function to optimise an ansi code sequence - which we could then apply at the end.
#we don't apply non SGR codes to our output. This is probably what is wanted - but should be reviewed.
#Review - consider if any other types of code make sense to retain in the output in this context.
#[para]This simply joines the elements of the list with -joinchar
#[para]It is mainly intended for use in pipelines where the primary argument comes at the end - but it can also be used as a general replacement for join $lines <le>
#[para]The sister function lines_as_list takes a block of text and splits it into lines - but with more options related to trimming the block and/or each line.
if {[set eop [lsearch $args --]] == [llength $args]-2} {
#end-of-opts not really necessary - except for consistency with lines_as_list
#eat or own dogfood version - shows the implementation is simpler - but unfortunately not suitable for a simple function like this which should be as fast as possible
return [join [dict get $values 0] [dict get $opts -joinchar]]
}
proc lines_as_list {args} {
#The underlying function linelist has the validation code which gives nice usage errors.
#we can't use a dict merge here without either duplicating the underlying validation somewhat, or risking a default message from dict merge error
#..because we don't know what to say if there are odd numbers of args
#we can guess that it's ok to insert our default if no -block found in $args - but as a general principle this mightn't always work
#e.g if -block is also a valid value for the textblock itself. Which in this case it is - although unlikely, and our -block {} default is irrelevant in that case anyway
if {[lsearch $args "--"] == [llength $args]-2} {
set opts [lrange $args 0 end-2]
} else {
set opts [lrange $args 0 end-1]
}
#set opts [dict merge {-block {}} $opts]
set bposn [lsearch $opts -block]
if {$bposn < 0} {
set opts {-block {}}
}
set text [lindex $args end]
tailcall linelist {*}$opts $text
}
#this demonstrates the ease of using an args processor - but as lines_as_list is heavily used in terminal output - we can't afford the extra microseconds
proc lines_as_list2 {args} {
#pass -anyopts 1 so we can let the next function decide what arguments are valid - but still pass our defaults
#-anyopts 1 avoids having to know what to say if odd numbers of options passed etc
#we don't have to decide what is an opt vs a value
#even if the caller provides the argument -block without a value the next function's validation will report a reasonable error because there is now nothing in $values (consumed by -block)
# -block trimming only trims completely empty lines. use -line trimming to remove whitespace e.g -line trimright will clear empty lines without affecting leading whitespace on other lines that aren't pure whitespace
proc linelist {args} {
#puts "---->linelist '$args'"
set usage "linelist ?-line trimline|trimleft|trimright? ?-block trimhead|trimtail|triminner|trimall|trimhead1|trimtail1|collateempty? -commandprefix <cmdlist> text"
set revlinelist [lreverse $linelist][set linelist {}]
set i 0
foreach ln $revlinelist {
if {$ln ne ""} {
set linelist [lreverse [lrange $revlinelist $i end]]
break
}
incr i
}
}
# --- ---
set start 0
set end "end"
if {"trimhead1" in $opt_block} {
if {[lindex $linelist 0] eq ""} {
set start 1
}
}
if {"trimtail1" in $opt_block} {
if {[lindex $linelist end] eq ""} {
set end "end-1"
}
}
set linelist [lrange $linelist $start $end]
}
if {[llength $opt_commandprefix]} {
set transformed [list]
foreach ln $linelist {
lappend transformed [{*}$opt_commandprefix $ln]
}
set linelist $transformed
}
return $linelist
}
#maintenance - take over from punk::args - or put back in punk::args once fixed to support pipeline argument order
#possible improvements - after the 1st call, replace the callsite in the calling proc with an inline script to process and validate the arguments as specified in optionspecs
#This would require a tcl parser .. and probably lots of other work
#It would also probably only be practical if there are no dynamic entries in the optionspecs. An option for opts_values to indicate the caller wants this optimisation would probably be best.
#generally we expect values to contain leading dashes only if -- specified. Otherwise no reliable way determine difference between bad flags and values
#If no eopts (--) specified we stop looking for opts at the first nondash encountered in a position we'd expect a dash - so without eopt, values could contain dashes - but not in first position after flags.
#only supports -flag val pairs, not solo options
#If an option is supplied multiple times - only the last value is used.
#[para]Parse rawargs as a sequence of zero or more option-value pairs followed by zero or more values
#[para]Returns a dict of the form: opts <options_dict> values <values_dict>
#[para]ARGUMENTS:
#[list_begin arguments]
#[arg_def multiline-string optionspecs]
#[para] This a block of text with records delimited by newlines (lf or crlf) - but with multiline values allowed if properly quoted/braced
#[para]'info complete' is used to determine if a record spans multiple lines due to multiline values
#[para]Each optionspec line must be of the form:
#[para]-optionname -key val -key2 val2...
#[para]where the valid keys for each option specification are: -default -type -range -choices -optional
#[arg_def list rawargs]
#[para] This is a list of the arguments to parse. Usually it will be the \$args value from the containing proc
#[list_end]
#[para]
#consider line-processing example below for we need info complete to determine record boundaries
#punk::lib::opt_values {
# -opt1 -default {}
# -opt2 -default {
# etc
# } -multiple 1
#} $args
#-- cannot be used to allow opts_values itself to accept rawargs as separate values - so it doesn't serve much purpose other than as an indicator of intention
#For consistency we support it anyway.
#we have to be careful with end-of-options flag --
#It may legitimately be the only value in the rawargs list (which is a bit odd - but possible) or it may occur immediately before optionspecs and rawargs
#if there is more than one entry in rawargs - we won't find it anyway - so that's ok
set eopts_posn [lsearch $args --]
if {$eopts_posn == ([llength $args]-1)} {
#sole argument in rawargs - not the one we're looking for
#first process dashed and non-dashed argspecs without regard to whether non-dashed are at the beginning or end
set value_names [list]
set records [list]
set linebuild ""
foreach rawline [split $optionspecs \n] {
set recordsofar [string cat $linebuild $rawline]
if {![info complete $recordsofar]} {
append linebuild [string trimleft $rawline] \n
} else {
lappend records [string cat $linebuild $rawline]
set linebuild ""
}
}
foreach ln $records {
set trimln [string trim $ln]
if {$trimln eq "" || [string index $trimln 0] eq "#"} {
continue
}
set argname [lindex $trimln 0]
set argspecs [lrange $trimln 1 end]
if {[llength $argspecs] %2 != 0} {
error "punk::lib::opts_values - bad optionspecs line for argument '$argname' Remaining items on line must be in paired option-value format - received '$argspecs'"
}
if {[string match -* $argname]} {
dict set argspecs -ARGTYPE option
set is_opt 1
} else {
dict set argspecs -ARGTYPE value
lappend value_names $argname
set is_opt 0
}
dict for {spec specval} $argspecs {
if {$spec ni $known_argspecs} {
error "punk::lib::opts_values - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs"
}
}
set argspecs [dict merge $optspec_defaults $argspecs]
dict set arg_info $argname $argspecs
if {![dict get $argspecs -optional]} {
if {$is_opt} {
lappend required_opts $argname
} else {
lappend required_vals $argname
}
}
if {[dict exists $arg_info $argname -default]} {
if {$is_opt} {
dict set defaults_dict_opts $argname [dict get $arg_info $argname -default]
} else {
dict set defaults_dict_values $argname [dict get $arg_info $argname -default]
}
}
}
#puts "--> [info frame -2] <--"
set cmdinfo [dict get [info frame -2] cmd]
#we can't treat cmdinfo as a list - it may be something like {command {*}$args} in which case lindex $cmdinfo 0 won't work
#hopefully first word is a plain proc name if this function was called in the normal manner - directly from a proc
#we will break at first space and assume the lhs of that will give enough info to be reasonable - (alternatively we could use entire cmdinfo - but it might be big and ugly)
set caller [regexp -inline {\S+} $cmdinfo]
#if called from commandline or some other contexts such as outside of a proc in a namespace - caller may just be "namespace"
if {$caller eq "namespace"} {
set caller "punk::lib::opts_values called from namespace"
}
# ------------------------------
if {$caller ne "punk::lib::opts_values"} {
#1) check our caller's call to us - recursive version - perhaps more elegant to eat our own dogfood - but maybe too much overhead for a script-based args processor which is already quite heavy :/
# error "punk::lib::opts_values expected: a multiline text block of option-specifications, a list of args and at most three option pairs -minvalues <int>, -maxvalues <int>, -anyopts true|false - got extra arguments: '$ownvalues'"
#}
#set opt_minvalues [dict get $ownopts -minvalues]
#set opt_maxvalues [dict get $ownopts -maxvalues]
#set opt_anyopts [dict get $ownopts -anyopts]
#2) Quick and dirty - but we don't need much validation
set defaults [dict create\
-minvalues 0\
-maxvalues -1\
-anyopts 0\
]
dict for {k v} $ov_opts {
if {$k ni {-minvalues -maxvalues -anyopts}} {
error "punk::lib::opts_values unrecognised option $k. Known values [dict keys $defaults]"
}
if {![string is integer -strict $v]} {
error "punk::lib::opts_values argument $k must be of type integer"
}
}
set ov_opts [dict merge $defaults $ov_opts]
set opt_minvalues [dict get $ov_opts -minvalues]
set opt_maxvalues [dict get $ov_opts -maxvalues]
set opt_anyopts [dict get $ov_opts -anyopts]
} else {
#don't recurse ie don't check our own args if we called ourself
set opt_minvalues 2
set opt_maxvalues 2
set opt_anyopts 0
}
# ------------------------------
if {[set eopts [lsearch $rawargs "--"]] >= 0} {
set values [lrange $rawargs $eopts+1 end]
set arglist [lrange $rawargs 0 $eopts-1]
} else {
if {[lsearch $rawargs -*] >= 0} {
#to support option values with leading dash e.g -offset -1 , we can't just take the last flagindex
set i 0
foreach {k v} $rawargs {
if {![string match -* $k]} {
break
}
if {$i+1 >= [llength $rawargs]} {
#no value for last flag
error "bad options for $caller. No value supplied for last option $k"
}
incr i 2
}
set arglist [lrange $rawargs 0 $i-1]
set values [lrange $rawargs $i end]
} else {
set arglist [list]
set values $rawargs ;#no -flags detected
}
}
#confirm any valnames before last don't have -multiple key
foreach valname [lrange $value_names 0 end-1] {
if {[dict exists $arg_info $valname -multiple ]} {
error "bad key -multiple on argument spec for '$valname'. Only the last value argument specification can be marked -multiple"
}
}
set values_dict [dict create]
set validx 0
set in_multiple ""
foreach valname $value_names val $values {
if {$validx+1 > [llength $values]} {
break
}
if {$valname ne ""} {
if {[dict exists $arg_info $valname -multiple] && [dict get $arg_info $valname -multiple]} {
dict lappend values_dict $valname $val
set in_multiple $valname
} else {
dict set values_dict $valname $val
}
} else {
if {$in_multiple ne ""} {
dict lappend values_dict $in_multiple $val
} else {
dict set values_dict $validx $val
}
}
incr validx
}
if {$opt_maxvalues == -1} {
#only check min
if {[llength $values] < $opt_minvalues} {
error "bad number of trailing values for $caller. Got [llength $values] values. Expected at least $opt_minvalues"
error "bad number of trailing values for $caller. Got [llength $values] values. Expected exactly $opt_minvalues"
} else {
error "bad number of trailing values for $caller. Got [llength $values] values. Expected between $opt_minvalues and $opt_maxvalues inclusive"
}
}
}
#opts explicitly marked as -optional 0 must be present - regardless of -anyopts (which allows us to ignore additional opts to pass on to next call)
#however - if -anyopts is true, there is a risk we will treat a shortened option name as matching our default - when it was intended for the next call
#We will always require exact matches for all required opts to avoid this risk, even though an ultimately-called function may not require the full-length option-name REVIEW
#The aim is to allow a wrapper function to specify a default value for an option (and/or other changes/restrictions to a particular option or two) - without having to re-specify all the options for the underlying function.
#without full respecification in the wrapper - we can't know that a supplied prefix is unambiguous at the next level
#For this reason we need to pass on full-length opts for any defined options in the wrapper even if anyopts is true
set argnamespresent [dict keys $arglist]
foreach r $required_opts {
if {$r ni $argspresent} {
error "Required option missing for $caller. '$r' is marked with -optional false - so must be present in its full-length form"
}
}
set valuenamespresent [dict keys $values_dict]
foreach r $required_vals {
if {$r ni $valuenamespresent} {
error "Required value missing for $caller. '$r' is marked with -optional false - so must be present"
}
}
if {!$opt_anyopts} {
set checked_args [dict create]
for {set i 0} {$i < [llength $arglist]} {incr i} {
#allow this to error out with message indicating expected flags
set val [lindex $arglist $i+1]
set fullopt [tcl::prefix match -message "options for $caller. Unexpected option" [dict keys $arg_info] [lindex $arglist $i]]
if {[dict exists $arg_info $fullopt -multiple] && [dict get $arg_info $fullopt -multiple]} {
dict lappend checked_args $fullopt $val
} else {
dict set checked_args $fullopt $val
}
incr i ;#skip val
}
} else {
#still need to use tcl::prefix match to normalize - but don't raise an error
set checked_args [dict create]
dict for {k v} $arglist {
if {![catch {tcl::prefix::match [dict keys $arg_info] $k} fullopt]} {
if {[dict exists $arg_info $fullopt -multiple] && [dict get $arg_info $fullopt -multiple]} {
dict lappend checked_args $fullopt $v
} else {
dict set checked_args $fullopt $v
}
} else {
#opt was unspecified
dict set checked_args $k $v
}
}
}
set opts [dict merge $defaults_dict_opts $checked_args]
#assert - checked_args keys are full-length option names if -anyopts was false or if the supplied option as a shortname matched one of our defined options
set values [dict merge $defaults_dict_values $values_dict]
#todo - allow defaults outside of choices/ranges
#check types,ranges,choices
set opts_and_values [concat $opts $values]
set combined_defaults [concat $defaults_dict_values $defaults_dict_opts] ;#can be no key collisions - we don't allow a value key beginning with dash - opt names must begin with dash
dict for {o v} $opts_and_values {
if {[dict exists $arg_info $o -multiple] && [dict get $arg_info $o -multiple]} {
set vlist $v
} else {
set vlist [list $v]
}
if {[dict exists $arg_info $o -validate_without_ansi] && [dict get $arg_info $o -validate_without_ansi]} {
set validate_without_ansi 1
package require punk::ansi
} else {
set validate_without_ansi 0
}
if {[dict exists $arg_info $o -allow_ansi] && [dict get $arg_info $o -allow_ansi]} {
set allow_ansi 1
} else {
#ironically - we need punk::ansi to detect and disallow - but we don't need it if ansi is allowed
package require punk::ansi
set allow_ansi 0
}
if {!$allow_ansi} {
foreach e $vlist {
if {[punk::ansi::ta::detect $e]} {
error "Option $o for $caller contains ansi - but -allow_ansi is false. Received: '$e'"
}
}
}
set vlist_check [list]
foreach e $vlist {
if {$validate_without_ansi} {
lappend vlist_check [punk::ansi::stripansi $e]
} else {
lappend vlist_check $e
}
}
set is_default 0
foreach e $vlist e_check $vlist_check {
if {[dict exists $combined_defaults $o] && ($e_check eq [dict get $combined_defaults $o])} {
incr is_default
}
}
if {$is_default eq [llength $vlist]} {
set is_default true
}
#we want defaults to pass through - even if they don't pass the checks that would be required for a specified value
#If the caller manually specified a value that happens to match the default - we don't detect that as any different from an unspecified value - Review.
if {!$is_default} {
if {[dict exists $arg_info $o -type]} {
set type [dict get $arg_info $o -type]
if {[string tolower $type] in {int integer double}} {
if {[string tolower $type] in {int integer}} {
foreach e $vlist e_check $vlist_check {
if {![string is integer -strict $e_check]} {
error "Option $o for $caller requires type 'integer'. Received: '$e'"
}
}
} elseif {[string tolower $type] in {double}} {
foreach e $vlist e_check $vlist_check {
if {![string is double -strict $e_check]} {
error "Option $o for $caller requires type 'double'. Received: '$e'"
}
}
}
#todo - small-value double comparisons with error-margin? review
if {[dict exists $arg_info $o -range]} {
lassign [dict get $arg_info $o -range] low high
foreach e $vlist e_check $vlist_check {
if {$e_check < $low || $e_check > $high} {
error "Option $o for $caller must be between $low and $high. Received: '$e'"
}
}
}
} elseif {[string tolower $type] in {bool boolean}} {
foreach e $vlist e_check $vlist_check {
if {![string is boolean -strict $e_check]} {
error "Option $o for $caller requires type 'boolean'. Received: '$e'"
}
}
} elseif {[string tolower $type] in {alnum alpha ascii control digit graph lower print punct space upper wordchar xdigit}} {
foreach e $vlist e_check $vlist_check {
if {![string is [string tolower $type] $e_check]} {
error "Option $o for $caller requires type '[string tolower $type]'. Received: '$e'"
#nscommands returns exactly one line per entry + a trailing newline. If there is an empty line other than at the end - that is because there is a command named as the empty string.
# By default 'linelist' trims 1st and last empty line. Turn off all block trimming with -block {}