package provide punk [namespace eval punk { #FUNCTL variable version set version 0.1 }] #Punk - where radical modification is a craft and anti-patterns are another exploratory tool for the Pattern Punk. #Built on Tcl of course - because it's the most powerful piece of under-appreciated and alternate-thinking engineering you can plug into. #repltelemetry cooperation with other packages such as shellrun #Maintenance warning: shellrun expects repltelemetry_emmitters to exist if punk namespace exists namespace eval punk { variable repltelemetry_emmitters #don't stomp.. even if something created this namespace in advance and is 'cooperating' a bit early if {![info exists repltelemetry_emitters]} { set repltelemetry_emmitters [list] } } namespace eval punk::pipecmds { #where to install proc/compilation artifacts for pieplines namespace export * } #globals... some minimal global var pollution #punk's official silly test dictionary set punk_testd [dict create \ a0 a0val \ b0 [dict create \ a1 b0a1val \ b1 b0b1val \ c1 b0c1val \ d1 b0d1val \ ] \ c0 [dict create] \ d0 [dict create \ a1 [dict create \ a2 d0a1a2val \ b2 d0a1b2val \ c2 d0a1c2val \ ] \ b1 [dict create \ a2 [dict create \ a3 d0b1a2a3val \ b3 d0b1a2b3val \ ] \ b2 [dict create \ a3 d0b1b2a3val \ bananas "in pyjamas" \ c3 [dict create \ po "in { }" \ b4 ""\ c4 "can go boom" \ ] \ d3 [dict create \ a4 "-paper -cuts" \ ] \ e3 [dict create] \ ] \ ] \ ] \ ] #impolitely cooperative withe punk repl - todo - tone it down. namespace eval ::repl { variable running 0 } package require punk::config namespace eval punk { interp alias {} purelist {} lreplace x 0 0 ;#required by pipe system package require pattern package require shellfilter package require punkapp package require funcl package require control control::control assert enabled 1 namespace import ::control::assert package require struct::list package require fileutil #package require punk::lib #NOTE - always call debug.xxx with braced message instead of double-quoted (unless specifically intending to do double-subtition) #(or $ within values will be substituted, causing an extra error message if the var doesn't exist - which it quite possibly doesn't) package require debug debug define punk.unknown debug define punk.pipe debug define punk.pipe.var debug define punk.pipe.args debug define punk.pipe.rep ;#string/list representation with tcl::unsupported::representation debug define punk.pipe.compile ;#info about when we compile pipeline components into procs etc #----------------------------------- # todo - load initial debug state from config debug off punk.unknown debug level punk.unknown 1 debug off punk.pipe debug level punk.pipe 4 debug off punk.pipe.var debug level punk.pipe.var 4 debug off punk.pipe.args debug level punk.pipe.args 3 debug off punk.pipe.rep 2 debug on punk.pipe.compile debug level punk.pipe.compile 4 debug header "dbg> " variable last_run_display [list] variable colour_disabled 0 variable ns_current "::" #variable re_headvar1 {([a-zA-Z:@.(),]+?)(?![^(]*\))(,.*)*$} proc ::punk::K {x y} { return $x} proc ::punk::var {varname {= {}} args} { upvar $varname the_var if {${=} == "="} { if {[llength $args] > 1} { set the_var [uplevel 1 $args] } else { set the_var [lindex $args 0] } } else { set the_var } } #https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ # #we can't provide a float comparison suitable for every situation, #but we pick something reasonable, keep it stable, and document it. proc float_almost_equal {a b} { package require math::constants set diff [expr {abs($a - $b)}] if {$diff <= $math::constants::eps} { return 1 } set A [expr {abs($a)}] set B [expr {abs($b)}] set largest [expr {($B > $A) ? $B : $A}] return [expr {$diff <= $largest * $math::constants::eps}] } #boolean could be tr, true, y, ye, yes,Yes, 1 , 0 etc. proc boolean_equal {a b} { #equivalenttly xnor: expr {!(($a && 1) ^ ($b && 1))} ;# less clear and no discernable timing benefit. expr {($a && 1) == ($b && 1)} } #debatable whether boolean_almost_equal is likely to be surprising or helpful. #values from a calculation that are extremely close to zero but aren't false could also be surprising - especially if they compare equal numerically #perhaps a fuzzy-boolean is a step too far for a default - but it's inline with float-comparison for pattern-matching. use an even more complex classifier? (^&~) ? proc boolean_almost_equal {a b} { if {[string is double -strict $a]} { if {[float_almost_equal $a 0]} { set a 0 } } if {[string is double -strict $b]} { if {[float_almost_equal $b 0]} { set b 0 } } #must handle true,no etc. expr {($a && 1) == ($b && 1)} } proc varinfo {vname {flag ""}} { upvar $vname v if {[array exists $vname]} { error "can't read \"$vname\": variable is array" } if {[catch {set v} err]} { error "can't read \"$vname\": no such variable" } set inf [shellfilter::list_element_info [list $v]] set inf [dict get $inf 0] if {$flag eq "-v"} { return $inf } set output [dict create] dict set output wouldbrace [dict get $inf wouldbrace] dict set output wouldescape [dict get $inf wouldescape] dict set output head_tail_names [dict get $inf head_tail_names] dict set output len [dict get $inf len] return $output } namespace eval ensemble { #wiki.tcl-lang.org/page/ensemble+extend # extend an ensemble-like routine with the routines in some namespace proc extend {routine extension} { if {![string match ::* $routine]} { set resolved [uplevel 1 [list ::namespace which $routine]] if {$resolved eq {}} { error [list {no such routine} $routine] } set routine $resolved } set routinens [namespace qualifiers $routine] if {$routinens eq {::}} { set routinens {} } set routinetail [namespace tail $routine] if {![string match ::* $extension]} { set extension [uplevel 1 [ list [namespace which namespace] current]]::$extension } if {![namespace exists $extension]} { error [list {no such namespace} $extension] } set extension [namespace eval $extension [ list [namespace which namespace] current]] namespace eval $extension [ list [namespace which namespace] export *] while 1 { set renamed ${routinens}::${routinetail}_[info cmdcount] if {[namespace which $renamed] eq {}} break } rename $routine $renamed namespace eval $extension [ list namespace ensemble create -command $routine -unknown [ list apply {{renamed ensemble routine args} { list $renamed $routine }} $renamed ] ] return $routine } } #review - extending core commands could be a bit intrusive...although it can make sense in a pipeline. #e.g contrived pipeline example to only allow setting existing keys ## .= @head.= list {a aaa b bbb c ccc} |d,dkeys@keys> |> &true.= {is_list_all_in_list $nkeys $dkeys} |> {dict modify d {*}$new} |> &true.= {is_list_all_ni_list $nkeys $dkeys} |> {dict modify d {*}$new} "] ;# (> required for insertionspecs at rhs of = & .= ) #except when prefixed directly by pin classifier ^ set protect_terminals [list "^"] ;# e.g sequence ^# #also - an atom usually doesn't need the / as a terminal - because it can't match a missing element unless it's empty string #ie the one usecase is '/n to match either empty string or missing item at position n. For this one usecase - we miss the capability to atom match paths/urls .. '/usr/local/et' set in_brackets 0 set in_atom 0 #set varspecs [string trimleft $varspecs ,] set token "" #if {[string first "," $varspecs] <0} { # return $varspecs #} set first_term -1 set token_index 0 ;#index of terminal char within each token set prevc "" set char_index 0 foreach c [split $varspecs ""] { if {$in_atom} { append token $c #set nextc [lindex $chars $char_index+1] if {$c eq "'"} { set in_atom 0 } } elseif {$in_brackets} { append token $c if {$c eq ")"} { set in_brackets 0 } } else { if {$c eq ","} { #lappend varlist [splitstrposn $token $first_term] set var $token set spec "" if {$first_term > 0} { #tcl scan with %s will not handle whitespace as desired. Be explicit using string range instead. #lassign [scan $token %${first_term}s%s] var spec set var [string range $token 0 $first_term-1] set spec [string range $token $first_term end] ;#key section includes the terminal char which ended the var and starts the spec } else { if {$first_term == 0} { set var "" set spec $token } } lappend varlist [list [string trim $var] [string trim $spec]] set token "" set token_index -1 ;#reduce by 1 because , not included in next token set first_term -1 } else { append token $c if {$first_term == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { set first_term $token_index } elseif {$c eq "'"} { set in_atom 1 } elseif {$c eq "("} { set in_brackets 1 } } } set prevc $c incr token_index incr char_index } if {[string length $token]} { #lappend varlist [splitstrposn $token $first_term] set var $token set spec "" if {$first_term > 0} { #lassign [scan $token %${first_term}s%s] var spec set var [string range $token 0 $first_term-1] set spec [string range $token $first_term end] ;#key section includes the terminal char which ended the var and starts the spec } else { if {$first_term == 0} { set var "" set spec $token } } lappend varlist [list [string trim $var] [string trim $spec]] } proc $cmdname {} [list return $varlist] debug.punk.pipe.compile {proc $cmdname} 4 return $varlist } proc _split_var_key_at_unbracketed_comma {varspecs} { set varlist [list] set var_terminals [list "@" "/" "#"] #except when prefixed directly by pin classifier ^ set protect_terminals [list "^"] ;# e.g sequence ^# #also - an atom usually doesn't need the / as a terminal - because it can't match a missing element unless it's empty string #ie the one usecase is '/n to match either empty string or missing item at position n. For this one usecase - we miss the capability to atom match paths/urls .. '/usr/local/et' set in_brackets 0 #set varspecs [string trimleft $varspecs ,] set token "" #if {[string first "," $varspecs] <0} { # return $varspecs #} set first_term -1 set token_index 0 ;#index of terminal char within each token set prevc "" foreach c [split $varspecs ""] { if {$in_brackets} { append token $c if {$c eq ")"} { set in_brackets 0 } } else { if {$c eq ","} { #lappend varlist [splitstrposn $token $first_term] set var $token set spec "" if {$first_term > 0} { lassign [scan $token %${first_term}s%s] var spec } else { if {$first_term == 0} { set var "" set spec $token } } lappend varlist [list $var $spec] set token "" set token_index -1 ;#reduce by 1 because , not included in next token set first_term -1 } else { append token $c if {$first_term == -1 && (($c in $var_terminals) && ($prevc ni $protect_terminals))} { set first_term $token_index } elseif {$c eq "("} { set in_brackets 1 } } } set prevc $c incr token_index } if {[string length $token]} { #lappend varlist [splitstrposn $token $first_term] set var $token set spec "" if {$first_term > 0} { lassign [scan $token %${first_term}s%s] var spec } else { if {$first_term == 0} { set var "" set spec $token } } lappend varlist [list $var $spec] } return $varlist } proc _split_var_key_at_unbracketed_comma1 {varspecs} { set varlist [list] set var_terminals [list "@" "/" "#"] set in_brackets 0 #set varspecs [string trimleft $varspecs ,] set token "" #if {[string first "," $varspecs] <0} { # return $varspecs #} set first_term -1 set token_index 0 ;#index of terminal char within each token foreach c [split $varspecs ""] { if {$in_brackets} { if {$c eq ")"} { set in_brackets 0 } append token $c } else { if {$c eq ","} { if {$first_term > -1} { set v [string range $token 0 $first_term-1] set k [string range $token $first_term end] ;#key section includes the terminal char lappend varlist [list $v $k] } else { lappend varlist [list $token ""] } set token "" set token_index -1 ;#reduce by 1 because , not included in next token set first_term -1 } else { if {$first_term == -1} { if {$c in $var_terminals} { set first_term $token_index } } append token $c if {$c eq "("} { set in_brackets 1 } } } incr token_index } if {[string length $token]} { if {$first_term > -1} { set v [string range $token 0 $first_term-1] set k [string range $token $first_term end] ;#key section includes the terminal char lappend varlist [list $v $k] } else { lappend varlist [list $token ""] } } return $varlist } proc fp_restructure {selector data} { if {$selector eq ""} { fun=.= {val $input} and always break set lhs "" set rhs "" #todo - check performance impact of catches around list and dict operations - consider single catch around destructure and less specific match error info? foreach index $subindices { set subpath [join [lrange $subindices 0 $i_keyindex] /] set lhs $subpath set assigned "" set get_not 0 set already_assigned 0 set do_bounds_check 0 ;#modified by leading single @ for list operations - doesn't apply to certain items like 'head','tail' which have specifically defined bounds-checks implicit in their normal meaning. #thse have anyhead and anytail for explicit allowance to be used on lists with insufficient items to produce values. if {$index eq "#"} { set active_key_type "list" if {![catch {llength $leveldata} assigned]} { set already_assigned 1 } else { set action ?mismatch-not-a-list break } } elseif {$index eq "##"} { set active_key_type "dict" if {![catch {dict size $leveldata} assigned]} { set already_assigned 1 } else { set action ?mismatch-not-a-dict break } } elseif {$index eq "#?"} { set assigned [string length $leveldata] set already_assigned 1 } elseif {$index eq "@"} { upvar v_list_idx v_list_idx ;#positional tracker for /@ - list position set active_key_type "list" #e.g @1/1/@/1 the lone @ is a positional spec for this specific subkey #no normalization done - ie @2/@ will not be considered same subkey as @end/@ or @end-0/@ even if llength = 3 #while x@,y@.= is reasonably handy - especially for args e.g $len} { set action ?mismatch-list-index-out-of-range break } set assigned [lindex $leveldata $index] set already_assigned 1 } else { if {$index in [list "@@" "@?@" "@??@"]} { set active_key_type "dict" #NOTE: it may at first seem pointless to use @@/key, since we have to know the key - but this can be used to match 'key' only at the first position in .= list key {x y} key2 etc #x@@ = a {x y} #x@@/@0 = a #x@@/@1 = x y #x@@/a = a {x y} # but.. as the @@ is stateful - it generally isn't very useful for multiple operations on the same pair within the pattern group. # (note that ?@ forms a different subpath - so can be used to test match prior to @@ without affecting the index) # It is analogous to v1@,v2@ for lists. # @pairs is more useful for repeated operations # #set subpath [join [lrange $subindices 0 $i_keyindex] /] if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict break } set next_this_level [incr v_dict_idx($subpath)] set keyindex [expr {$next_this_level -1}] if {($keyindex + 1) <= $dsize} { set k [lindex [dict keys $leveldata] $keyindex] if {$index eq "@?@"} { set assigned [dict get $leveldata $k] } else { set assigned [list $k [dict get $leveldata $k]] } } else { if {$index eq "@@"} { set action ?mismatch-dict-index-out-of-range break } else { set assigned [list] } } set already_assigned 1 } elseif {[string match @@* $index]} { set active_key_type "dict" set key [string range $index 2 end] #dict exists test is safe - no need for catch if {[dict exists $leveldata $key]} { set assigned [dict get $leveldata $key] } else { set action ?mismatch-dict-key-not-found break } set already_assigned 1 } elseif {[string match {@\?@*} $index]} { set active_key_type "dict" set key [string range $index 3 end] #dict exists test is safe - no need for catch if {[dict exists $leveldata $key]} { set assigned [dict get $leveldata $key] } else { set assigned [list] } set already_assigned 1 } elseif {[string match {@\?\?@*} $index]} { set active_key_type "dict" set key [string range $index 4 end] #dict exists test is safe - no need for catch if {[dict exists $leveldata $key]} { set assigned [list $key [dict get $leveldata $key]] } else { set assigned [list] } set already_assigned 1 } elseif {[string match @* $index]} { set active_key_type "list" set do_bounds_check 1 set index [string trimleft $index @] } else { # } if {!$already_assigned} { if {[string match "not-*" $index] && $active_key_type in [list "" "list"]} { #e.g not-0-end-1 not-end-4-end-2 set get_not 1 #cherry-pick some easy cases, and either assign, or re-map to corresponding index if {$index eq "not-tail"} { set active_key_type "list" set assigned [lindex $leveldata 0]; set already_assigned 1 } elseif {$index in [list "not-head" "not-0"]} { set active_key_type "list" #set selector "tail"; set get_not 0 set assigned [lrange $leveldata 1 end]; set already_assigned 1 } elseif {$index eq "not-end"} { set active_key_type "list" set assigned [lrange $leveldata 0 end-1]; set already_assigned 1 } else { #trim off the not- and let the remaining index handle based on get_not being 1 set index [string range $index 4 end] } } } } if {!$already_assigned} { #keyword 'pipesyntax' at beginning of error message set listmsg "pipesyntax Unable to interpret subindex $index\n" append listmsg "selector: '$selector'\n" append listmsg "@ must be followed by a selector (possibly compound separated by forward slashes) suitable for lindex or lrange commands, or a not-x expression\n" append listmsg "Additional accepted keywords include: head tail\n" append listmsg "Use var@@key to treat value as a dict and retrieve element at key" #we can't just set 'assigned' for a position spec for in/ni (not-in) because we don't have the value here to test against #need to set a corresponding action if {$active_key_type in [list "" "list"]} { set active_key_type "list" #for pattern matching purposes - head/tail not valid on empty lists (similar to elixir) if {$index eq "0"} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } set assigned [lindex $leveldata 0] } elseif {$index eq "head"} { #NOTE: /@head and /head both do bounds check. This is intentional if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } if {$len == 0} { set action ?mismatch-list-index-out-of-range-empty break } #alias for 0 - for h@head,t@tail= similar to erlang/elixir hd() tl() or [head | tail] = list syntax set assigned [lindex $leveldata 0] } elseif {$index eq "end"} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } if {$do_bounds_check && $len < 1} { set action ?mismatch-list-index-out-of-range } set assigned [lindex $leveldata end] } elseif {$index eq "tail"} { #NOTE: /@tail and /tail both do bounds check. This is intentional. if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } #tail is a little different in that we allow tail on a single element list - returning an empty result - but it can't be called on an empty list #arguably tail could be considered as an index-out-of-range for less than 2 elements - but this would be less useful, and surprising to those coming from other pattern-matching systems. #In this way tail is different to @1-end if {$len == 0} { set action ?mismatch-list-index-out-of-range break } set assigned [lrange $leveldata 1 end] ;#return zero or more elements - but only if there is something (a head) at position zero. } elseif {$index eq "anyhead"} { #allow returning of head or nothing if empty list if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } set assigned [lindex $leveldata 0] } elseif {$index eq "anytail"} { #allow returning of tail or nothing if empty list #anytail will return empty both for empty list, or single element list - but potentially useful in combination with anyhead. if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } set assigned [lrange $leveldata 1 end] } elseif {$index eq "init"} { #all but last element - same as haskell 'init' if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } set assigned [lrange $leveldata 0 end-1] } elseif {$index eq "list"} { #allow returning of entire list even if empty if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } set assigned $leveldata } elseif {$index eq "raw"} { #no list checking.. set assigned $leveldata } elseif {$index eq "keys"} { #need active_key_type of 'list' for 'keys' and 'values' keywords which act on either dict or a list with even number of elements if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict break } set assigned [dict keys $leveldata] } elseif {$index eq "values"} { #need active_key_type of 'list' for 'keys' and 'values' keywords which act on either dict or a list with even number of elements if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict break } set assigned [dict values $leveldata] } elseif {$index eq "pairs"} { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict break } #set assigned [dict values $leveldata] set pairs [list] dict for {k v} $leveldata {lappend pairs [list $k $v]} set assigned [lindex [list $pairs [unset pairs]] 0] } elseif {[string is integer -strict $index]} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } # only check if @ was directly in original index section if {$do_bounds_check && ($index+1 > $len || $index < 0)} { set action ?mismatch-list-index-out-of-range break } if {$get_not} { #already handled not-0 set assigned [lreplace $leveldata $index $index] } else { set assigned [lindex $leveldata $index] } } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } #leave the - from the end- as part of the offset set offset [expr $endspec] ;#don't brace! if {$do_bounds_check && ($offset > 0 || abs($offset) >= $len)} { set action ?mismatch-list-index-out-of-range break } if {$get_not} { set assigned [lreplace $leveldata $index $index] } else { set assigned [lindex $leveldata $index] } } elseif {[regexp {^([0-9]+|end|end[-+]{1,2}[0-9]+)-([0-9]+|end|end[-+]{1,2}([0-9]+))$} $index _ start end]} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } if {$do_bounds_check && [string is integer -strict $start]} { if {$start+1 > $len || $start < 0} { set action ?mismatch-list-index-out-of-range break } } elseif {$start eq "end"} { #ok } elseif {$do_bounds_check} { set startoffset [string range $start 3 end] ;#include the - from end- set startoffset [expr $startoffset] ;#don't brace! if {$startoffset > 0 || abs($startoffset) >= $len} { set action ?mismatch-list-index-out-of-range break } } if {$do_bounds_check && [string is integer -strict $end]} { if {$end+1 > $len || $end < 0} { set action ?mismatch-list-index-out-of-range break } } elseif {$end eq "end"} { #ok } elseif {$do_bounds_check} { set endoffset [string range $end 3 end] ;#include the - from end- set endoffset [expr $endoffset] ;#don't brace! if {$endoffset > 0 || abs($endoffset) >= $len} { set action ?mismatch-list-index-out-of-range break } } if {$get_not} { set assigned [lreplace $leveldata $start $end] } else { set assigned [lrange $leveldata $start $end] } } else { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } elseif {[string first - $index] > 0} { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list break } #handle pure int-int ranges separately set testindex [string map [list - "" + ""] $index] if {[string is digit -strict $testindex]} { #don't worry about leading - negative value for indices not valid anyway set parts [split $index -] if {[llength $parts] != 2} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } lassign $parts start end if {$start+1 > $len || $end+1 > $len} { set action ?mismatch-not-a-list break } if {$get_not} { set assigned [lreplace $leveldata $start $end] } else { set assigned [lrange $leveldata $start $end] } } else { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } else { #keyword 'pipesyntax' at beginning of error message error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } else { #treat as dict key set active_key_type "dict" if {[dict exists $leveldata $index]} { set assigned [dict get $leveldata $index] } else { set action ?mismatch-dict-key-not-found break } } } set leveldata $assigned set rhs $leveldata #don't break on empty data - operations such as # and ## can return 0 #if {![llength $leveldata]} { # break #} incr i_keyindex } #puts stdout "----> destructure rep leveldata: [rep $leveldata]" #puts stdout ">> destructure returning: [dict create -assigned $leveldata -action $action -lhs $lhs -rhs $rhs]" #maintain key order - caller unpacks using lassign return [dict create -assigned $leveldata -action $action -lhs $lhs -rhs $rhs] } #todo - fp_destructure - return a function-pipeline that can then be transformed to a funcl and finally a tcl script proc destructure_func {selector data} { #puts stderr ".d." set selector [string trim $selector /] #upvar v_list_idx v_list_idx ;#positional tracker for /@ - list position #upvar v_dict_idx v_dict_idx ;#positional tracker for /@@ - dict position set leveldata $data set cmdname ::punk::pipecmds::destructure_$selector if {$cmdname in [info commands $cmdname]} { tailcall $cmdname $data } set script "proc $cmdname {leveldata} {" append script \n [string map [list $selector] {set selector ""}] ;# script should only need for error msgs set subindices [split $selector /] append script \n [string map [list [list $subindices] ] {set subindices }] set action ?match ;#default assumption. Alternatively set to ?mismatch or ?mismatch- and always break append script \n {set action ?match} #append script \n {set assigned ""} ;#review set active_key_type "" append script \n {# set activey_key_type ""} set lhs $selector append script \n [string map [list $selector ] {set lhs ""}] set rhs "" append script \n {set rhs ""} set selector_script_complete 0 if {![string length $selector]} { append script \n { set assigned $leveldata set rhs $leveldata set leveldata $assigned } set selector_script_complete 1 } elseif {[string is digit -strict [join $subindices ""]]} { #puts stderr ">>>>>>>>>>>>>>>> data: $leveldata selector: $selector subindices: $subindices" #pure numeric keylist - put straight to lindex # #NOTE: this direct access e.g v/0/1/2 doesn't check out of bounds which is at odds with list access containing @ #We will leave this as a syntax for different (more performant) behaviour #- it's potentially a little confusing - but it would be a shame not to have the possibility to take advantage of the lindex deep indexing capability in pattern matching. #TODO - review and/or document # #Todo - add a handler for v/n/n/n/n/# to allow unchecked counting at depth too. #set assigned [lindex $leveldata {*}$subindices] append script \n [string map [list $subindices] { set assigned [lindex $leveldata ] set rhs $leveldata set leveldata $assigned }] set selector_script_complete 1 } elseif {([scan $selector %d-%d a b] == 2) && $selector eq "${a}-${b}"} { #single-level pure digit range a-b - no bounds checking append script \n [string map [list $a $b] { set assigned [lrange $leveldata ] set rhs $leveldata set leveldata $assigned }] #lset var_actions $i 1 ?set #lset var_actions $i 2 $assigned set selector_script_complete 1 } elseif {$selector eq "0"} { #review - can we get here? append script \n { if {[catch {lindex $leveldata 0} hd]} { set action ?mismatch-not-a-list } else { set assigned $hd set rhs $leveldata set leveldata $assigned } } set selector_script_complete 1 } elseif {$selector eq "head"} { #head is never allowed to match empty list - (vs anyhead to allow) append script \n { if {[catch {lindex $leveldata 0} hd]} { set action ?mismatch-not-a-list } else { if {[llength $leveldata] == 0} { set action ?mismatch-list-index-out-of-range-empty } else { set assigned $hd set rhs $leveldata set leveldata $assigned } } } set selector_script_complete 1 } elseif {$selector eq "#"} { # always present as /# - / required to separate from @@# maining dict key "#" - also leading # would be a comment. append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned $len set rhs $leveldata set leveldata $assigned } } set selector_script_complete 1 } elseif {$selector eq "##"} { # /## append script \n { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict } else { set assigned $dsize set rhs $leveldata set leveldata $assigned } } set selector_script_complete 1 } elseif {$selector eq "#?"} { append script \n { set assigned [string length $leveldata] set rhs $leveldata set leveldata $assigned } set selector_script_complete 1 } elseif {[string match "@@*" $selector]} { #part following a double @ is dict key possibly with forward-slash separators for subpath access e.g @@key/subkey/etc set rawkeylist [split $selector /] ;#first key retains @@ - may be just '@@' set keypath [string range $selector 2 end] set keylist [split $keypath /] if {([lindex $rawkeylist 0] ne "@@") && ([lsearch $keylist @*] == -1) && ([lsearch $keylist #*] == -1)} { #pure keylist for dict - process in one go #dict exists will return 0 if not a valid dict. # is equivalent to {*}keylist when substituted append script \n [string map [list $keylist] { if {[dict exists $leveldata ]} { set assigned [dict get $leveldata ] set rhs $leveldata set leveldata $assigned } else { set action ?mismatch-dict-key-not-found } }] set selector_script_complete 1 } else { #compound keylist e.g x@@data/@0/1 or x@@/a (combined dict/list access) #process level by level set selector_script_complete 0 } } else { set selector_script_complete 0 } if {!$selector_script_complete} { set i_keyindex 0 append script \n {set i_keyindex 0} #todo - check performance impact of catches around list and dict operations - consider single catch around destructure and less specific match error info? foreach index $subindices { set level_script_complete 0 ;#instead of break - as we can't use data to determine break when building script set subpath [join [lrange $subindices 0 $i_keyindex] /] append script \n "# ------- START index $index ------" append script \n "set subpath $subpath" set lhs $subpath append script \n "set lhs $subpath" set assigned "" append script \n {set assigned ""} #got_not shouldn't need to be in script set get_not 0 # do_bounds_check shouldn't need to be in script set do_bounds_check 0 ;#modified by leading single @ for list operations - doesn't apply to certain items like 'head','tail' which have specifically defined bounds-checks implicit in their normal meaning. #thse have anyhead and anytail for explicit allowance to be used on lists with insufficient items to produce values. #append script \n {set do_boundscheck 0} if {$index eq "#"} { set active_key_type "list" append script \n {# set active_key_type "list"} append script \n { if {[catch {llength $leveldata} assigned]} { set action ?mismatch-not-a-list } } set level_script_complete 1 } elseif {$index eq "##"} { set active_key_type "dict" append script \n {# set active_key_type "dict"} append script \n { if {[catch {dict size $leveldata} assigned]} { set action ?mismatch-not-a-dict } } set level_script_complete 1 } elseif {$index eq "#?"} { #set assigned [string length $leveldata] append script \n {set assigned [string length $levedata]} set level_script_complete 1 } elseif {$index eq "@"} { append script \n {upvar v_list_idx v_list_idx} set active_key_type "list" append script \n {# set active_key_type "list"} #e.g @1/1/@/1 the lone @ is a positional spec for this specific subkey #no normalization done - ie @2/@ will not be considered same subkey as @end/@ or @end-0/@ even if llength = 3 #while x@,y@.= is reasonably handy - especially for args e.g $len} { # set action ?mismatch-list-index-out-of-range # break #} append script \n {set index [expr {[incr v_list_idx($subpath)]-1}]} append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } elseif {$index+1 > $len} { set action ?mismatch-list-index-out-of-range } else { set assigned [lindex $leveldata $index] } } #set assigned [lindex $leveldata $index] set level_script_complete 1 } else { if {$index in [list "@@" "@?@" "@??@"]} { set active_key_type "dict" append script \n {# set active_key_type "dict"} append script \n {upvar v_dict_idx v_dict_idx} #NOTE: it may at first seem pointless to use @@/key, since we have to know the key - but this can be used to match 'key' only at the first position in .= list key {x y} key2 etc #x@@ = a {x y} #x@@/@0 = a #x@@/@1 = x y #x@@/a = a {x y} # but.. as the @@ is stateful - it generally isn't very useful for multiple operations on the same pair within the pattern group. # (note that ?@ forms a different subpath - so can be used to test match prior to @@ without affecting the index) # It is analogous to v1@,v2@ for lists. # @pairs is more useful for repeated operations # #if {[catch {dict size $leveldata} dsize]} { # set action ?mismatch-not-a-dict # break #} else { # set next_this_level [incr v_dict_idx($subpath)] # set keyindex [expr {$next_this_level -1}] # if {($keyindex + 1) <= $dsize} { # set k [lindex [dict keys $leveldata] $keyindex] # if {$index eq "@?@"} { # set assigned [dict get $leveldata $k] # } else { # set assigned [list $k [dict get $leveldata $k]] # } # } else { # if {$index eq "@@"} { # set action ?mismatch-dict-index-out-of-range # break # } else { # set assigned [list] # } # } #} set subscript { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict } else { set next_this_level [incr v_dict_idx($subpath)] set keyindex [expr {$next_this_level -1}] } } set indent " " if {$index eq "@?@"} { set body [string map [list \r\n "\r\n$indent" \n "\n$indent" ] { if {($keyindex + 1) <= $dsize} { set k [lindex [dict keys $leveldata] $keyindex] set assigned [dict get $leveldata $k] } else { set assigned [list] } }] } elseif {$index eq "@@"} { set body [string map [list \r\n "\r\n$indent" \n "\n$indent" ] { if {($keyindex + 1) <= $dsize} { set k [lindex [dict keys $leveldata] $keyindex] set assigned [list $k [dict get $leveldata $k]] } else { set action ?mismatch-dict-index-out-of-range } }] } else { set body [string map [list \r\n "\r\n$indent" \n "\n$indent" ] { if {($keyindex + 1) <= $dsize} { set k [lindex [dict keys $leveldata] $keyindex] set assigned [list $k [dict get $leveldata $k]] } else { set assigned [list] } }] } append script \n [string map [list $body] $subscript] set level_script_complete 1 } elseif {[string match @@* $index]} { set active_key_type "dict" set key [string range $index 2 end] #dict exists test is safe - no need for catch append script \n [string map [list $key] { # set active_key_type "dict" if {[dict exists $leveldata ]} { set assigned [dict get $leveldata ] } else { set action ?mismatch-dict-key-not-found } }] set level_script_complete 1 } elseif {[string match {@\?@*} $index]} { set active_key_type "dict" set key [string range $index 3 end] #dict exists test is safe - no need for catch append script \n [string map [list $key] { # set active_key_type "dict" if {[dict exists $leveldata ]} { set assigned [dict get $leveldata ] } else { set assigned [list] } }] set level_script_complete 1 } elseif {[string match {@\?\?@*} $index]} { set active_key_type "dict" set key [string range $index 4 end] #dict exists test is safe - no need for catch append script \n [string map [list $key] { # set active_key_type "dict" if {[dict exists $leveldata ]} { set assigned [list [dict get $leveldata ]] } else { set assigned [list] } }] set level_script_complete 1 } elseif {[string match @* $index]} { set active_key_type "list" set do_bounds_check 1 set index [string trimleft $index @] append script \n [string map [list $index] { # set active_key_type "list" set index }] } else { # } if {[string match "not-*" $index] && $active_key_type in [list "" "list"]} { append script \n {#e.g not-0-end-1 not-end-4-end-2} set get_not 1 #cherry-pick some easy cases, and either assign, or re-map to corresponding index if {$index eq "not-tail"} { append script \n {# set active_key_type "list"} append script \n {set assigned [lindex $leveldata 0]} set level_script_complete 1 } elseif {$index in [list "not-head" "not-0"]} { append script \n {# set active_key_type "list"} append script \n {set assigned [lrange $leveldata 1 end]} set level_script_complete 1 } elseif {$index eq "not-end"} { append script \n {# set active_key_type "list"} append script \n {set assigned [lrange $leveldata 0 end-1]} set level_script_complete 1 } else { #trim off the not- and let the remaining index handle based on get_not being 1 set index [string range $index 4 end] append script \n "set index $index" } } } if {!$level_script_complete} { append script \n {if {$action eq "?match"}} " {" #keyword 'pipesyntax' at beginning of error message set listmsg "pipesyntax Unable to interpret subindex $index\n" append listmsg "selector: '$selector'\n" append listmsg "@ must be followed by a selector (possibly compound separated by forward slashes) suitable for lindex or lrange commands, or a not-x expression\n" append listmsg "Additional accepted keywords include: head tail\n" append listmsg "Use var@@key to treat value as a dict and retrieve element at key" #append script \n [string map [list $listmsg] {set listmsg ""}] #we can't just set 'assigned' for a position spec for in/ni (not-in) because we don't have the value here to test against #need to set a corresponding action if {$active_key_type in [list "" "list"]} { set active_key_type "list" append script \n {# set active_key_type "list"} #for pattern matching purposes - head/tail not valid on empty lists (similar to elixir) if {$index eq "0"} { #if {[catch {llength $leveldata} len]} { # set action ?mismatch-not-a-list # break #} #set assigned [lindex $leveldata 0] append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned [lindex $leveldata 0] } } } elseif {$index eq "head"} { #NOTE: /@head and /head both do bounds check. This is intentional append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } elseif {$len == 0} { set action ?mismatch-list-index-out-of-range-empty } else { #alias for 0 - for h@head,t@tail= similar to erlang/elixir hd() tl() or [head | tail] = list syntax set assigned [lindex $leveldata 0] } } } elseif {$index eq "end"} { if {$do_bounds_check} { append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } elseif {$len < 1} { set action ?mismatch-list-index-out-of-range } else { set assigned [lindex $leveldata end] } } } else { append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned [lindex $leveldata end] } } } } elseif {$index eq "tail"} { #NOTE: /@tail and /tail both do bounds check. This is intentional. # #tail is a little different in that we allow tail on a single element list - returning an empty result - but it can't be called on an empty list #arguably tail could be considered as an index-out-of-range for less than 2 elements - but this would be less useful, and surprising to those coming from other pattern-matching systems. #In this way tail is different to @1-end append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } elseif {$len == 0} { set action ?mismatch-list-index-out-of-range } else { set assigned [lrange $leveldata 1 end] ;#return zero or more elements - but only if there is something (a head) at position zero. } } } elseif {$index eq "anyhead"} { #allow returning of head or nothing if empty list append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned [lindex $leveldata 0] } } } elseif {$index eq "anytail"} { #allow returning of tail or nothing if empty list #anytail will return empty both for empty list, or single element list - but potentially useful in combination with anyhead. append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned [lrange $leveldata 1 end] } } } elseif {$index eq "init"} { #all but last element - same as haskell 'init' append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned [lrange $leveldata 0 end-1] } } } elseif {$index eq "list"} { #allow returning of entire list even if empty append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set assigned $leveldata } } } elseif {$index eq "raw"} { #no list checking.. append script \n {set assigned $leveldata} } elseif {$index eq "keys"} { #need active_key_type of 'list' for 'keys' and 'values' keywords which act on either dict or a list with even number of elements append script \n { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict } else { set assigned [dict keys $leveldata] } } } elseif {$index eq "values"} { #need active_key_type of 'list' for 'keys' and 'values' keywords which act on either dict or a list with even number of elements append script \n { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict } else { set assigned [dict values $leveldata] } } } elseif {$index eq "pairs"} { append script \n { if {[catch {dict size $leveldata} dsize]} { set action ?mismatch-not-a-dict } else { set pairs [list] dict for {k v} $leveldata {lappend pairs [list $k $v]} set assigned [lindex [list $pairs [unset pairs]] 0] } } } elseif {[string is integer -strict $index]} { if {$get_not} { set assign_script [string map [list $index] { #not- was specified (already handled not-0) set assigned [lreplace $leveldata ] }] } else { set assign_script [string map [list $index] {set assigned [lindex $leveldata ]}] } if {$do_bounds_check} { if {$index < 0} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector index_lessthanzero_out_of_bounds_for_all_data_while_bounds_check_on] } set max [expr {$index + 1}] append script \n [string map [list $max $assign_script] { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { set max # bounds_check due to @ directly specified in original index section if {$max > $len} { set action ?mismatch-list-index-out-of-range } else { } } }] } else { append script \n [string map [list $assign_script] { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { } }] } } elseif {[string first "end" $index] >=0} { if {[regexp {^end([-+]{1,2}[0-9]+)$} $index _match endspec]} { if {$get_not} { set assign_script [string map [list $index] { #not- was specified (already handled not-0) set assigned [lreplace $leveldata ] }] } else { set assign_script [string map [list $index ] {set assigned [lindex $leveldata ]}] } if {$do_bounds_check} { append script \n [string map [list $assign_script $endspec] { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { #bounds-check is true #leave the - from the end- as part of the offset set offset [expr ] ;#don't brace! if {($offset > 0 || abs($offset) >= $len)} { set action ?mismatch-list-index-out-of-range } else { } } }] } else { append script \n [string map [list $assign_script] { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } else { } }] } } elseif {[regexp {^([0-9]+|end|end[-+]{1,2}[0-9]+)-([0-9]+|end|end[-+]{1,2}([0-9]+))$} $index _ start end]} { if {$get_not} { set assign_script [string map [list $start $end ] { #not- was specified (already handled not-0) set assigned [lreplace $leveldata ] }] } else { set assign_script [string map [list $start $end] {set assigned [lrange $leveldata ]}] } append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } } if {$do_bounds_check} { if {[string is integer -strict $start]} { if {$start < 0} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector start_lessthanzero_out_of_bounds_for_all_data_while_bounds_check_on] } append script \n [string map [list $start] { set start if {$start+1 > $len} { set action ?mismatch-list-index-out-of-range } }] } elseif {$start eq "end"} { #noop } else { set startoffset [string range $start 3 end] ;#include the - from end- set startoffset [expr $startoffset] ;#don't brace! if {$startoffset > 0} { #e.g end+1 error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector end+x_out_of_bounds_for_all_data_while_bounds_check_on] } append script \n [string map [list $startoffset] { set startoffset if {abs($startoffset) >= $len} { set action ?mismatch-list-index-out-of-range } }] } if {[string is integer -strict $end]} { if {$end < 0} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector end_lessthanzero_out_of_bounds_for_all_data_while_bounds_check_on] } append script \n [string map [list $end] { set end if {$end+1 > $len} { set action ?mismatch-list-index-out-of-range } }] } elseif {$end eq "end"} { #noop } else { set endoffset [string range $end 3 end] ;#include the - from end- set endoffset [expr $endoffset] ;#don't brace! if {$endoffset > 0} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector end+x_out_of_bounds_for_all_data_while_bounds_check_on] } append script \n [string map [list $endoffset] { set endoffset if {abs($endoffset) >= $len} { set action ?mismatch-list-index-out-of-range } }] } } append script \n [string map [list $assign_script] { if {![string match ?mismatch-* $action]} { } }] } else { #fail now - no need for script error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } elseif {[string first - $index] > 0} { if {$get_not} { set assign_script [string map [list $index] { #not- was specified (already handled not-0) set assigned [lreplace $leveldata ] }] } else { set assign_script [string map [list $index] {set assigned [lindex $leveldata ]}] } append script \n { if {[catch {llength $leveldata} len]} { set action ?mismatch-not-a-list } } #handle pure int-int ranges separately set testindex [string map [list - "" + ""] $index] if {[string is digit -strict $testindex]} { #don't worry about leading - negative value for indices not valid anyway set parts [split $index -] if {[llength $parts] != 2} { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } lassign $parts start end append script [string map [list $start $end] { set start set end if {$start+1 > $len || $end+1 > $len} { set action ?mismatch-not-a-list } }] } else { error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } append script \n [string map [list $assign_script] { if {![string match ?mismatch-* $action]} { } }] } else { #keyword 'pipesyntax' at beginning of error message #pipesyntax error - no need to even build script - can fail now error $listmsg "destructure $selector" [list pipesyntax destructure selector $selector] } } else { #treat as dict key append script \n [string map [list $index] { # set active_key_type "dict" if {[dict exists $leveldata ]} { set assigned [dict get $leveldata ] } else { set action ?mismatch-dict-key-not-found } }] } append script \n "}" ;# if $action eq ?match } ;# end if $level_script_complete append script \n { if {$action eq "?match"} { set rhs $leveldata set leveldata $assigned } } incr i_keyindex append script \n "# ------- END index $index ------" } ;# end foreach } ;# end if !$selector_script_complete #puts stdout "----> destructure rep leveldata: [rep $leveldata]" #puts stdout ">> destructure returning: [dict create -assigned $leveldata -action $action -lhs $lhs -rhs $rhs]" #maintain key order - caller unpacks using lassign # # append script \n {dict create -assigned $leveldata -action $action -lhs $lhs -rhs $rhs} append script \n "}" \n eval $script debug.punk.pipe.compile {proc $cmdname} 4 #return [dict create -assigned $leveldata -action $action -lhs $lhs -rhs $rhs] tailcall $cmdname $data } proc _var_classify {multivar} { set cmdname ::punk::pipecmds::var_classify_$multivar if {$cmdname in [info commands $cmdname]} { return [$cmdname] } #comma seems a natural choice to split varspecs, #but also for list and dict subelement access #/ normally indicates some sort of hierarchical separation - (e.g in filesytems) #so / will indicate subelements e.g @0/1 for lindex $list 0 1 #set valsource_key_list [_split_var_key_at_unbracketed_comma $multivar] set valsource_key_list [_split_patterns $multivar] #mutually exclusive - atom/pin #set map [list "" ' ^ &] ;#0 = default/var/not-yet-determined 1 = atom 2 = pin #set var_class [lmap var $valsource_key_list {expr {([set m [lsearch $map [string index [lindex $var 0] 0]]] >= 0) ? [list $var $m] : [list $var 0]}}] #0 - novar #1 - atom ' #2 - pin ^ #3 - boolean & #4 - integer #5 - double #6 - var #7 - glob (no classifier and contains * or ?) #8 - numeric #9 - > (+) #10 - < (-) set var_names [list] set var_class [list] set varspecs_trimmed [list] ;#raw varspecs without pin/atom modifiers - or empty string for glob set leading_classifiers [list "'" "&" "^" ] set trailing_classifiers [list + -] set possible_number_start [list - + . 0 1 2 3 4 5 6 7 8 9 > <] foreach v_key $valsource_key_list { lassign $v_key v key set vname $v ;#default set classes [list] if {$v eq ""} { lappend var_class [list $v_key 0] lappend varspecs_trimmed $v_key } else { set firstchar [string index $v 0] set lastchar [string index $v end] if {$lastchar eq "+"} { lappend classes 9 set vname [string range $v 0 end-1] } if {$lastchar eq "-"} { lappend classes 10 set vname [string range $v 0 end-1] } if {$firstchar in $leading_classifiers} { if {$firstchar eq "'"} { lappend var_class [list $v_key 1] #set vname [string range $v 1 end] lappend varspecs_trimmed [list $vname $key] } elseif {$firstchar eq "^"} { lappend classes [list 2] #use vname - may already have trailing +/- stripped set vname [string range $vname 1 end] set secondclassifier [string index $v 1] if {$secondclassifier eq "&"} { #pinned boolean lappend classes 3 set vname [string range $v 2 end] } elseif {$secondclassifier eq "#"} { #pinned numeric comparison instead of string comparison lappend classes 8 set vname [string range $vname 1 end] } elseif {$secondclassifier eq "*"} { #pinned glob lappend classes 7 set vname [string range $v 2 end] } #todo - check for second tag - & for pinned boolean? #consider requiring ^# for numeric comparisons. currently no way to do a strictly string comparison on pinned variables.... default ^var really shouldn't be doing any magic. #while we're at it.. pinned glob would be nice. ^* #maybe even pinned scan ^% ? regex? ^/ or ^? these would be hard to have corresponding literals in the pattern mini-lang. #These all limit the range of varnames permissible - which is no big deal. lappend var_class [list $v_key $classes] lappend varspecs_trimmed [list $vname $key] } elseif {$firstchar eq "&"} { #we require boolean literals to be single-quoted so we can use cross-binding on boolean vars. #ie &true is the variable true whereas &'true' or &'1' &'t' etc are literal booleans #allow exception of &1 &0 to be literal booleans - because we disallow 0 & 1 as varnames in other contexts anyway - so it would be more consistent not to treat as varnames here. lappend var_class [list $v_key 3] set vname [string range $v 1 end] lappend varspecs_trimmed [list $vname $key] } } else { if {([string first ? $v]) >=0 || ([string first * $v] >=0)} { lappend var_class [list $v_key 7] ;#glob #leave vname as the full glob lappend varspecs_trimmed [list "" $key] } else { #scan vname not v - will either be same as v - or possibly stripped of trailing +/- set numtestv [join [scan $vname %lld%s] ""] ;# handles octals (leading zeros), ok for use with bignums, decimal points and sci notation - fails to handle leading dot e.g .5 #leading . still need to test directly for double if {[string is double -strict $vname] || [string is double -strict $numtestv]} { if {[string is integer -strict $numtestv]} { #this will pick up boolean 1 or 0 - but that's ok - they require "&" marker if boolean comparison desired #integer test before double.. #note there is also string is wide (string is wideinteger) for larger ints.. lappend classes 4 lappend var_class [list $v_key $classes] lappend varspecs_trimmed $v_key } else { #double #sci notation 1e123 etc #also large numbers like 1000000000 - even without decimal point - (tcl bignum) lappend classes 5 lappend var_class [list $v_key $classes] lappend varspecs_trimmed $v_key } } else { lappend var_class [list $v_key 6] ;#var lappend varspecs_trimmed $v_key } } } } lappend var_names $vname } set result [list var_names $var_names var_class $var_class varspecs_trimmed $varspecs_trimmed] proc $cmdname {} [list return $result] debug.punk.pipe.compile {proc $cmdname} return $result } #called from match_assign/know_dot_assign for lhs of assignment - uplevel 2 to caller's level #called from match_assign/know_dot_assign for rhs pipelined vars - uplevel 1 to write vars only in 'apply' scope #return a dict with keys result, setvars, unsetvars #TODO - implement cross-binding (as opposed to overwrite/reassignment) when a var appears multiple times in a pattern/multivar #e.g x@0,x@1 will only match if value at positions 0 & 1 is the same (a form of auto-pinning?) #e.g x,x@0 will only match a single element list #todo blocking or - p1|p2 if p1 matches - return p1 and continue pipeline - immediately return p2 if p1 didn't match. (ie p2 not forwarded in pipeline) # non-blocking or - p1||p2 if p1 matches - return p1 and continue pipeline - else match p2 and continue pipeline proc _multi_bind_result {multivar data args} { #puts stdout "---- _multi_bind_result $multivar $data $args" #'ismatch' must always be first element of dict - as we dispatch on ismatch 0 or ismatch 1 if {![string length $multivar]} { #treat the absence of a pattern as a match to anything return [dict create ismatch 1 result $data setvars {} script {}] } set returndict [dict create ismatch 0 result "" setvars {}] set script "" set defaults [list -unset 0 -levelup 2 -mismatchinfo 1 -script 0] set opts [dict merge $defaults $args] set unset [dict get $opts -unset] set lvlup [dict get $opts -levelup] set get_mismatchinfo [dict get $opts -mismatchinfo] #first classify into var_returntype of either "pipeline" or "segment" #segment returntype is indicated by leading % set varinfo [_var_classify $multivar] set var_names [dict get $varinfo var_names] set var_class [dict get $varinfo var_class] set varspecs_trimmed [dict get $varinfo varspecs_trimmed] set var_actions [list] set expected_values [list] #e.g {a = abc} {b set ""} foreach classinfo $var_class vname $var_names { lassign [lindex $classinfo 0] v lappend var_actions [list $v "" ""] ;#varactions keeps original lhs - not trimmed version lappend expected_values [list var $vname spec $v info - lhs - rhs -] ;#code looks for 'info -' to see if changed from default } #puts stdout "var_actions: $var_actions" #puts stdout "expected_values: $expected_values" #puts stdout "\n var_class: $var_class\n" # e.g {{x {}} 0} {{y @0} 0} {{'ok @0} 1} {{^v @@key} 2} #set varspecs_trimmed [lmap varinfo $var_class {expr {([lindex $varinfo 1] > 0) ? [list [string range [lindex $varinfo 0 0] 1 end] [lindex $varinfo 0 1]] : [lindex $varinfo 0]}}] #puts stdout "\n varspecs_trimmed: $varspecs_trimmed\n" #var names (possibly empty portion to the left of ) #debug.punk.pipe.var "varnames: $var_names" 4 set v_list_idx(@) 0 ;#for spec with single @ only set v_dict_idx(@@) 0 ;#for spec with @@ only #jn #member lists of returndict which will be appended to in the initial value-retrieving loop set returndict_setvars [dict get $returndict setvars] set assigned_values [list] #varname action value - where value is value to be set if action is set #actions: # "" unconfigured - assert none remain unconfigured at end # noop no-change # matchvar-set name is a var to be matched # matchatom-set names is an atom to be matched # matchglob-set # set # question mark versions are temporary - awaiting a check of action vs var_class # e.g ?set may be changed to matchvar or matchatom or set debug.punk.pipe.var {initial map expected_values: $expected_values} 5 set returnval "" set i 0 #assert i incremented at each continue and at each end of loop - at end i == list length + 1 #always use 'assigned' var in each loop # (for consistency and to assist with returnval) # ^var means a pinned variable - compare value of $var to rhs - don't assign # # In this loop we don't set variables - but assign an action entry in var_actions - all with leading question mark. # as well as adding the data values to the var_actions list # # TODO! we may (commonly) encounter same vkey in the pattern - no need to reparse and re-fetch from data! set vkeys_seen [list] foreach v_and_key $varspecs_trimmed { set vspec [join $v_and_key ""] lassign $v_and_key v vkey set assigned "" #The binding spec begins at first @ or # or / #set firstq [string first "'" $vspec] #set v [lindex $var_names $i] #if v contains any * and/or ? - then it is a glob match - not a varname lassign [destructure_func $vkey $data] _assigned assigned _action matchaction _lhs lhs _rhs rhs if {$matchaction eq "?match"} { set matchaction "?set" } lset var_actions $i 1 $matchaction lset var_actions $i 2 $assigned #update the setvars/unsetvars elements if {[string length $v]} { dict set returndict_setvars $v $assigned } lappend assigned_values $assigned incr i } #todo - fix! this isn't the actual tclvars that were set! dict set returndict setvars $returndict_setvars #assigned_values is the ordered list of source elements in the data (rhs) as extracted by each position-spec #For booleans the final val may later be normalised to 0 or 1 #assert all var_actions were set with leading question mark #perform assignments only if matched ok debug.punk.pipe.var {VAR_CLASS: $var_class} 5 debug.punk.pipe.var {VARACTIONS: $var_actions} 5 debug.punk.pipe.var {VARSPECS_TRIMMED: $varspecs_trimmed} 5 #0 - novar #1 - atom ' #2 - pin ^ #3 - boolean & #4 - integer #5 - double #6 - var #7 - glob (no classifier and contains * or ?) if 0 { debug.punk.pipe.var {atoms: [lsearch -all -inline -index 1 $var_class 1]} 5 debug.punk.pipe.var {pins: [lsearch -all -inline -index 1 $var_class 2]} 5 debug.punk.pipe.var {bools: [lsearch -all -inline -index 1 $var_class 3]} 5 debug.punk.pipe.var {ints: [lsearch -all -inline -index 1 $var_class 4]} 5 debug.punk.pipe.var {doubles: [lsearch -all -inline -index 1 $var_class 5]} 5 debug.punk.pipe.var {vars: [lsearch -all -inline -index 1 $var_class 6]} 5 debug.punk.pipe.var {globs: [lsearch -all -inline -index 1 $var_class 7]} 5 } set match_state [lrepeat [llength $var_names] ?] unset -nocomplain v unset -nocomplain nm set mismatched [list] set i 0 #todo - stop at first mismatch - for pattern matching (especially pipecase - we don't want to waste time reading vars if we already have a mismatch earlier in the pattern) foreach va $var_actions { lassign $va lhsspec act val ;#lhsspec is the full value source for LHS ie the full atom/number/varspec e.g for pattern ^var@@key/@0 it is "^var" set varname [lindex $var_names $i] if {[string match "?mismatch*" $act]} { #already determined a mismatch - e.g list or dict key not present lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch lhs ? rhs $val] break } set class_key [lindex $var_class $i 1] set isatom [expr {$class_key == 1}] set ispin [expr {2 in $class_key}] set isbool [expr {3 in $class_key}] set isint [expr {4 in $class_key}] set isdouble [expr {5 in $class_key}] set isvar [expr {$class_key == 6}] set isglob [expr {7 in $class_key}] set isnumeric [expr {8 in $class_key}] ;#force numeric comparison (only if # classifier present) #marking numbers with pin ^ has undetermined meaning. Perhaps force expr matching only? set isgreaterthan [expr {9 in $class_key}] set islessthan [expr {10 in $class_key}] if {$isatom} { #puts stdout "==>isatom $lhsspec" set lhs [string range $lhsspec 1 end] if {[string index $lhs end] eq "'"} { set lhs [string range $lhs 0 end-1] } lset var_actions $i 1 matchatom-set if {$lhs eq $val} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info match lhs $lhs rhs $val] incr i continue } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info strings-not-equal lhs $lhs rhs $val] break } } # - should set expected_values in each branch where match_state is not set to 1 # - setting expected_values when match_state is set to 0 is ok except for performance #todo - pinned booleans? we would need to disambiguate from a direct value match.. ie double tag as something like: ^&var or #ispin may reclassify as isint,isdouble based on contained value (as they don't have their own classifier char and are unambiguous and require special handling) if {$ispin} { #puts stdout "==>ispin $lhsspec" if {$act in [list "?set" "?matchvar-set"]} { lset var_actions $i 1 matchvar-set #attempt to read upvar $lvlup $varname the_var #if {![catch {uplevel $lvlup [list set $varname]} existingval]} {} if {![catch {set the_var} existingval]} { if {$isbool} { #isbool due to 2nd classifier i.e ^& lset expected_values $i [list var $varname spec $lhsspec info test-lhs-bool lhs $existingval rhs $val] #normalise to LHS! lset assigned_values $i $existingval } elseif {$isglob} { #isglob due to 2nd classifier ^* lset expected_values $i [list var $varname spec $lhsspec info test-lhs-glob lhs $existingval rhs $val] } elseif {$isnumeric} { #flagged as numeric by user using ^# classifiers set testexistingval [join [scan $existingval %lld%s] ""] ;# handles octals (leading zeros), ok for use with bignums, internal decimal points and sci notation (but not leading .) if {[string is integer -strict $testexistingval]} { set isint 1 lset assigned_values $i $existingval lset expected_values $i [list var $varname spec $lhsspec info test-lhs-int lhs $existingval rhs $val] } elseif {[string is double $existingval] || [string is double -strict $testexistingval]} { #test existingval in case something like .5 (which scan will have missed - producing empty testexistingval) set isdouble 1 #doubles comparisons use float_almost_equal - so lhs can differ from rhs - for pins we always want to return the normalised lhs ie exactly what is in the var lset assigned_values $i $existingval lset expected_values $i [list var $varname spec $lhsspec info test-lhs-double lhs $existingval rhs $val] } else { #user's variable doesn't seem to have a numeric value lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch-lhs-not-numeric lhs $existingval rhs $val] break } } else { #standard pin - single classifier ^var lset match_state $i [expr {$existingval eq $val}] if {![lindex $match_state $i]} { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "string-compare-not-equal" lhs $existingval rhs $val] break } else { lset expected_values $i [list var $varname spec $lhsspec info "string-compare-equal" lhs $existingval rhs $val] } } } else { #puts stdout "pinned var $varname result:$result vs val:$val" #failure is *probably* because var is unset - but could be a read-only var due to read-trace or it could be nonexistant namespace lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info failread-$varname lhs ? rhs $val] break } } } if {$isint} { #note - we can have classified (above) a value such as 08 on lhs as integer - even though expr and string is integer don't do so. #expected_values $i [list var $varname spec $lhsspec info match-lhs-int lhs $existingval rhs $val] if {$ispin} { set existing_expected [lindex $expected_values $i] set lhs [dict get $existing_expected lhs] } else { set lhs $lhsspec ;#literal integer in the pattern } if {$isgreaterthan || $islessthan} { set lhs [string range $lhsspec 0 end-1] set testlhs $lhs } if {[string index $lhs 0] eq "."} { set testlhs $lhs } else { set testlhs [join [scan $lhs %lld%s] ""] } if {[string index $val 0] eq "."} { set testval $val } else { set testval [join [scan $val %lld%s] ""] ;# handles octals (leading zeros) and bignums (not leading .) } if {[string is integer -strict $testval]} { if {$isgreaterthan} { #puts "lhsspec: $lhsspec testlhs: $testlhs testval: $testval" if {$testlhs <= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-greater-than-int-int" lhs $lhs rhs $val] break } } elseif {$islessthan} { if {$testlhs >= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-less-than-int-int" lhs $lhs rhs $val] break } } else { if {$testlhs == $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-mismatch-int-int" lhs $lhs rhs $val] break } } } elseif {[string is double -strict $testval]} { #dragons. (and shimmering) if {[string first "e" $val] != -1} { #scientific notation - let expr compare if {$isgreaterhthan} { if {$testlhs <= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-greater-than-int-sci" lhs $lhs rhs $val] break } } elseif {$islessthan} { if {$testlhs >= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-less-than-int-sci" lhs $lhs rhs $val] break } } else { if {$testlhs == $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-mismatch-int-sci" lhs $lhs rhs $val] break } } } elseif {[string is digit -strict [string trim $val -]] } { #probably a wideint or bignum with no decimal point #It seems odd that bignums which just look like large integers should ever compare equal if you do a +1 to one side . #if we use float_almost_equal they may compare equal. on the other hand expr also does apparently inconsistent thins with comparing integer-like bignums vs similar sized nums with .x at the end. #2 values further apart can compare equal while int-like ones closer together can compare different. #The rule seems to be for bignums that if it *looks* like a whole int the comparison is exact - but otherwise the float behaviours kick in. #This is basically what we're doing here but with an arguably better (for some purposes!) float comparison. #string comparison can presumably always be used as an alternative. # #let expr compare if {$isgreaterthan} { if {$testlhs <= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-greater-than-int-puredigits" lhs $lhs rhs $val] break } } elseif {$islessthan} { if {$testlhs >= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-less-than-int-puredigits" lhs $lhs rhs $val] break } } else { if {$testlhs == $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-mismatch-int-puredigits" lhs $lhs rhs $val] break } } } else { if {[punk::float_almost_equal $testlhs $testval]} { lset match_state $i 1 } else { if {$isgreaterthan} { if {$testlhs <= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-greater-than-int-float" lhs $lhs rhs $val] break } } elseif {$islessthan} { if {$testlhs >= $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-not-less-than-int-float" lhs $lhs rhs $val] break } } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "float_almost_equal-mismatch-int-float" lhs $lhs rhs $val] break } } } } else { #e.g rhs not a number.. if {$testlhs == $testval} { lset match_state $i 1 } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "expr-mismatch-unknown-rhstestval-$testval" lhs $lhs rhs $val] break } } } elseif {$isdouble} { #dragons (and shimmering) # # if {$ispin} { set existing_expected [lindex $expected_values $i] set lhs [dict get $existing_expected lhs] } else { set lhs $lhsspec ;#literal integer in the pattern } if {$isgreaterthan || $islessthan} { error "+/- not yet supported for lhs float" set lhs [string range $lhsspec 0 end-1] set testlhs $lhs } if {[string index $val 0] eq "."} { set testval $val ;#not something with some number of leading zeros } else { set testval [join [scan $val %lld%s] ""] ;# handles octals (leading zeros), ok for use with bignums, internal decimal points and sci notation (but not leading .) } #expr handles leading 08.1 0009.1 etc without triggering octal #so we don't need to scan lhs if {[string first "e" $lhs] >= 0 || [string first "e" $testval] >= 0} { if {$lhs == $testval} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info match-expr-sci lhs $lhs rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch-expr-sci lhs $lhs rhs $val] break } } elseif {[string is digit -strict [string trim $lhs -]] && [string is digit -strict [string trim $val -]]} { #both look like big whole numbers.. let expr compare using it's bignum capability if {$lhs == $testval} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info match-expr-pure-digits lhs $lhs rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch-expr-pure-digits lhs $lhs rhs $val] break } } else { #float_almost_equal will disagree with expr based on scale.. just enough to allow for example [expr 0.2 + 0.1] to equal 0.3 - whereas expr will declare a mismatch if {[punk::float_almost_equal $lhs $testval]} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info match-float-almost-equal lhs $lhs rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch-float-almost-equal lhs $lhs rhs $val] break } } } elseif {$isbool} { #Note - cross binding of booleans deliberately doesn't compare actual underlying values - only that truthiness or falsiness matches. #e.g &x/0,&x/1,&x/2= {1 2 yes} # all resolve to true so the cross-binding is ok. # Also - the setting of the variable x is normalized to 1 or 0 only. (true & false would perhaps be nicer - but 1 & 0 are theoretically more efficient for later comparisons as they can have a pure int rep?.) # todo - consider the booleanString rep. Can/should we return true & false instead and maintain efficiency w.r.t shimmering? # #punk::boolean_equal $a $b set extra_match_info "" ;# possible crossbind indication set is_literal_boolean 0 if {$ispin} { #for a pinned boolean - the most useful return is the value in the pinned var rather than the rhs. This is not entirely consistent .. e.g pinned numbers will return rhs !review! #As an additional pattern can always retrieve the raw value - pinned vars returning themselves (normalisation use-case ) seems the most consistent overall, and the most useful set existing_expected [lindex $expected_values $i] set lhs [dict get $existing_expected lhs] } else { set lhs [string range $lhsspec 1 end] ;# - strip off & classifier prefix if {![string length $lhs]} { #empty varname - ok if {[string is boolean -strict $val] || [string is double -strict $val]} { lset match_state $i 1 lset var_actions $i 1 "return-normalised-value" lset assigned_values $i [expr {bool($val)}] lset expected_values $i [list var $varname spec $lhsspec info "return-boolean-rhs-normalised" lhs - rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "mismatch-boolean-rhs" lhs - rhs $val] break } } elseif {$lhs in [list 0 1]} { #0 & 1 are the only literal numbers that satisfy Tcl's 'string is boolean' test. set is_literal_boolean 1 } elseif {[string index $lhs 0] eq "'" && [string index $lhs end] eq "'"} { #literal boolean (&'yes',&'false',&'1',&'0' etc) in the pattern #we won't waste any cycles doing an extra validity test here - it will fail in the comparison below if not a string understood by Tcl to represent a boolean. set is_literal_boolean 1 set lhs [string range $lhs 1 end-1] ;#strip off squotes } else { #todo - a standard variable name checking function for consistency.. for now we'll rule out numbers here to help avoid mistakes. set tclvar $lhs if {[string is double $tclvar]} { error "pipesyntax invalid variable name '$tclvar' for boolean in pattern. (subset of legal tcl vars allowed in pattern context)" "_multi_bind_result $multivar $data $args" [list pipesyntax patternvariable invalid_boolean $tclvar] #proc _multi_bind_result {multivar data args} } #treat as variable - need to check cross-binding within this pattern group set first_bound [lsearch -index 0 $var_actions $lhsspec] if {$first_bound == $i} { #test only rhs (val) for boolean-ness - but boolean-ness as boolean_almost_equal understands it. (e.g floats allowed) if {[string is boolean -strict $val] || [string is double -strict $val]} { lset match_state $i 1 lset var_actions $i 1 [string range $act 1 end] ;# should now be the value "set". We only need this on the first_bound #review - consider what happens if boolean is leftmost pattern - underlying value vs normalised value to continue in pipeline #Passing underlying value is inconsistent with what goes in the tclvar - so we need to update the returnval #puts stderr "==========[lindex $assigned_values $i]" lset var_actions $i 2 [expr {bool($val)}] ;#normalise to 1 or 0 lset assigned_values $i [lindex $var_actions $i 2] #puts stderr "==========[lindex $assigned_values $i]" lset expected_values $i [list var $varname spec $lhsspec info "match-boolean-rhs-any-lhs" lhs - rhs $val] ;#retain underlying val in expected_values for diagnostics. } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "mismatch-boolean-rhs-any-lhs" lhs - rhs $val] break } } else { set expectedinfo [lindex $expected_values $first_bound] set expected_earlier [dict get $expectedinfo rhs] set extra_match_info "-crossbind-first" set lhs $expected_earlier } } } #may have already matched above..(for variable) if {[lindex $match_state $i] != 1} { if {![catch {punk::boolean_almost_equal $lhs $val} ismatch]} { if {$ismatch} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info match-boolean-almost-equal$extra_match_info lhs $lhs rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info mismatch-boolean-almost-equal$extra_match_info lhs $lhs rhs $val] break } } else { #we should only error from boolean_equal if passed something Tcl doesn't recognise as a boolean lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info badvalue-boolean$extra_match_info lhs $lhs rhs $val] break } } } elseif {$isglob} { if {$ispin} { set existing_expected [lindex $expected_values $i] set lhs [dict get $existing_expected lhs] } else { set lhs $lhsspec ;#literal glob in the pattern - no classifier prefix } if {[string match $lhs $val]} { lset match_state $i 1 lset expected_values $i [list var $varname spec $lhsspec info "match-glob" lhs $lhs rhs $val] } else { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "mismatch-glob" lhs $lhs rhs $val] break } } elseif {$ispin} { #handled above.. leave case in place so we don't run else for pins } else { #puts stdout "==> $lhsspec" #NOTE - pinned var of same name is independent! #ie ^x shouldn't look at earlier x bindings in same pattern #unpinned non-atoms #cross-binding. Within this multivar pattern group only (use pin ^ for binding to result from a previous pattern) # if {$varname eq ""} { #don't attempt cross-bind on empty-varname lset match_state $i 1 #don't change var_action $i 1 to set lset expected_values $i [list var $varname spec $lhsspec info "match-no-lhs-var" lhs - rhs $val] } elseif {$varname eq "_"} { #don't cross-bind on the special 'don't-care' varname lset match_state $i 1 lset var_actions $i 1 [string range $act 1 end] ;# ?set -> set lset expected_values $i [list var $varname spec $lhsspec info "match-any-lhs-dontcare-var" lhs - rhs $val] } else { set first_bound [lsearch -index 0 $var_actions $varname] #assert first_bound >=0, we will always find something - usually self if {$first_bound == $i} { lset match_state $i 1 lset var_actions $i 1 [string range $act 1 end] ;# ?set -> set lset expected_values $i [list var $varname spec $lhsspec info "match-any-lhs" lhs - rhs $val] } else { #assert - first_bound < $i set expectedinfo [lindex $expected_values $first_bound] set expected_earlier [dict get $expectedinfo rhs] if {$expected_earlier ne $val} { lset match_state $i 0 lset expected_values $i [list var $varname spec $lhsspec info "mismatch-crossbind-first" lhs $expected_earlier rhs $val] break } else { lset match_state $i 1 #don't convert ?set to set - or var setter will write for each crossbound instance. Possibly no big deal for performance - but could trigger unnecessary write traces for example #lset var_actions $i 1 [string range $act 1 end] lset expected_values $i [list var $varname spec $lhsspec info "match-crossbind-first" lhs $expected_earlier rhs $val] } } } } incr i } set returnval [lindex $assigned_values 0] #puts stdout "----> > rep returnval: [rep $returnval]" #-------------------------------------------------------------------------- #Variable assignments (set) should only occur down here, and only if we have a match #-------------------------------------------------------------------------- set match_count_needed [llength $var_actions] #set match_count [expr [join $match_state +]] ;#expr must be unbraced here set matches [lsearch -all -inline $match_state 1] ;#default value for each match_state entry is "?" set match_count [llength $matches] debug.punk.pipe.var {MATCH_STATE: $match_state count_needed: $match_count_needed vs match_count: $match_count} 4 debug.punk.pipe.var {VARACTIONS2: $var_actions} 5 debug.punk.pipe.var {EXPECTED : $expected_values} 4 #set match_count [>f . foldl 0 [>f . sum .] $match_state] ;#ok method.. but slow compared to expr with join if {$match_count == $match_count_needed} { #do assignments set i 0 foreach va $var_actions { #set isvar [expr {[lindex $var_class $i 1] == 6}] if {([lindex $var_class $i 1] in [list 6 3]) && ([string length [set varname [lindex $var_names $i]]])} { #isvar lassign $va lhsspec act val upvar $lvlup $varname the_var if {[lindex $var_actions $i 1] eq "set"} { set the_var $val } } incr i } } else { #todo - some way to restrict mismatch info to simple "mismatch" and avoid overhead of verbose message #e.g for within pipeswitch block where mismatches are expected and the reasons are less important than moving on quickly set vidx 0 set mismatches [lmap m $match_state v $var_names {expr {$m == 0} ? {[list mismatch $v]} : {[list match $v]}}] set var_display_names [list] foreach v $var_names { if {$v eq ""} { lappend var_display_names {{}} } else { lappend var_display_names $v } } set mismatches_display [lmap m $match_state v $var_display_names {expr {$m == 0} ? {$v} : {[expr {$m eq "?"} ? {"?[string repeat { } [expr [string length $v] -1]]"} : {[string repeat " " [string length $v]]} ]}}] set msg "\n" append msg "Unmatched\n" append msg "Cannot match right hand side to pattern $multivar\n" append msg "vars/atoms/etc: $var_names\n" append msg "mismatches: [join $mismatches_display { } ]\n" set i 0 #0 - novar #1 - atom ' #2 - pin ^ #3 - boolean & #4 - integer #5 - double #6 - var #7 - glob (no classifier and contains * or ?) foreach mismatchinfo $mismatches { lassign $mismatchinfo status varname if {$status eq "mismatch"} { # varname can be empty string set varclass [lindex $var_class $i 1] set val [lindex $var_actions $i 2] set e [dict get [lindex $expected_values $i] lhs] set type "" if {2 in $varclass} { append type "pinned " } if {$varclass == 1} { set type "atom" } elseif {$varclass == 2} { set type "pinned var" } elseif {3 in $varclass} { append type "boolean" } elseif {4 in $varclass} { append type "int" } elseif {5 in $varclass} { append type "double" } elseif {$varclass == 6} { set type "var" } elseif {7 in $varclass} { append type "glob" } elseif {8 in $varclass} { append type "numeric" } if {$type eq ""} { set type "" } set lhs_tag "- [dict get [lindex $expected_values $i] info]" set mmaction [lindex $var_actions $i 1] ;#e.g ?mismatch-dict-index-out-of-range set tag "?mismatch-" if {[string match $tag* $mmaction]} { set mismatch_reason [string range $mmaction [string length $tag] end] } else { set mismatch_reason $mmaction } append msg " $type: '$varname' $mismatch_reason $lhs_tag LHS: '$e' vs RHS: '$val'\n" } incr i } #error $msg dict unset returndict result #structured error return - used by pipeswitch/pipecase - matching on "binding mismatch*" dict set returndict mismatch [dict create binding mismatch varnames $var_names matchinfo $mismatches display $msg data $data] return $returndict } if {![llength $var_names]} { #var_name entries can be blank - but it will still be a list dict set returndict result $data } else { #punk::assert {$i == [llength $var_names]} dict set returndict result $returnval } return $returndict } ######################################################## # dragons. # using an error as out-of-band way to signal mismatch is the easiest. # It comes at some cost (2us 2023?) to trigger catches. (which is small relative to uncompiled pipeline costs in initial version - but per pattern mismatch will add up) # The alternative of attempting to tailcall return the mismatch as data - is *hard* if not impossible to get right. # We need to be able to match on things like {error {mismatch etc}} - without it then itself being interpreted as a mismatch! # A proper solution may involve a callback? tailcall some_mismatch_func? # There may be a monad-like boxing we could do.. to keep it in data e.g {internalresult match } {internalresult mismatch } and be careful to not let boxed data escape ?? # make sure there is good test coverage before experimenting with this proc _handle_bind_result {d} { #set match_caller [info level 2] #debug.punk.pipe {_handle_bind_result match_caller: $match_caller} 9 if {![dict exists $d result]} { #uplevel 1 [list error [dict get $d mismatch]] #error [dict get $d mismatch] return -code error -errorcode [list binding mismatch varnames [dict get $d mismatch varnames]] [dict get $d mismatch] } else { return [dict get $d result] } } # initially promising - but the approach runs into impossible disambiguation of mismatch as data vs an actual mismatch proc _handle_bind_result_experimental1 {d} { #set match_caller [info level 2] #debug.punk.pipe {_handle_bind_result match_caller: $match_caller} 9 if {![dict exists $d result]} { tailcall return [dict get $d mismatch] } else { return [dict get $d result] } } ######################################################## #timings very similar. listset3 closest in performance to pipeset. review - test on different tcl versions. #Unfortunately all these variations seem around 10x slower than 'set list {a b c}' or 'set list [list a b c]' #there seems to be no builtin for list setting with args syntax. lappend is close but we would need to catch unset the var first. #proc listset1 {listvarname args} { # tailcall set $listvarname $args #} #interp alias {} listset2 {} apply {{vname args} {tailcall set $vname $args}} #interp alias {} listset3 {} apply {{vname args} {upvar $vname v; set v $args}} proc pipeset {pipevarname args} { upvar $pipevarname the_pipe set the_pipe $args } proc pipealias {targetcmd args} { tailcall interp alias {} $targetcmd {} {*}$args } #same as used in unknown func for initial launch #variable re_assign {^([^\r\n=\{]*)=(.*)} #variable re_assign {^[\{]{0,1}([^ \t\r\n=]*)=(.*)} variable re_assign {^([^ \t\r\n=\{]*)=(.*)} variable re_dot_assign {^([^ \t\r\n=\{]*)\.=(.*)} #match_assign is tailcalled from unknown - uplevel 1 gets to caller level proc match_assign {scopepattern equalsrhs args} { #review - :: is legal in atoms! if {[string match "*::*" $scopepattern]} { error "match_assign scopepattern '$scopepattern' contains namespace separator '::' - invalid." } puts stderr ">> match_assign '$scopepattern=$equalsrhs' $args" set fulltail $args set homens ::punk::pipecmds set pipecmd ${homens}::$scopepattern=$equalsrhs #pipecmd could have glob chars - test $pipcmd in the list - not just that info commands returns results. if {$pipecmd in [info commands $pipecmd]} { #puts "==nscaller: '[uplevel 1 [list namespace current]]'" uplevel 1 [list namespace import $pipecmd] tailcall $pipecmd {*}$args } #NOTE: #we need to ensure for case: #= x=y #that the second arg is treated as a raw value - never a pipeline command #equalsrhs is set if there is a segment-insertion-pattern *directly* after the = #debug.punk.pipe {match_assign '$multivar' '$equalsrhs' '$fulltail'} 4 #can match pattern on lhs with a value where pattern is a minilang that can refer to atoms (simple non-whitespace strings), numbers, or varnames (possibly pinned) as well as a trailing spec for position within the data. # allow x=insertionpattern to begin a pipeline e.g x= |> string tolower ? or x=1 a b c <| X to produce a X b c # #to assign an entire pipeline to a var - use pipeset varname instead. # in our script's handling of args: #avoid use of regexp match on each element - or we will unnecessarily force string reps on lists #same with lsearch with a string pattern - #wouldn't matter for small lists - but we need to be able to handle large ones efficiently without unneccessary string reps set script [string map [list $scopepattern $equalsrhs] { if {[llength $args]} { #scan for existence of any pipe operator (|*> or <*|) only - we don't need position #all pipe operators must be a single element foreach a $args { if {![catch {llength $a} sublen]} { #don't enforce sublen == 1. Legal to have whitespace including newlines {| x >} if {[string match |*> $a] || [string match <*| $a]} { tailcall punk::pipeline = "" "" {*}$args } } } if {[llength $args] == 1} { set segmenttail [lindex $args 0] } else { error "pipedata = must take a single argument. Got [llength $args] args: '$args'" "match_assign $args" [list pipedata segment too_many_elements segment_type =] } } else { #set segmenttail [purelist] set segmenttail [lreplace x 0 0] } }] if {[string length $equalsrhs]} { # as we aren't in a pipleine - there is no data to insert - we proably still need to run _split_equalsrhs to verify the syntax. # review - consider way to turn it off as optimisation for non-pipelined assignment - but generally standard Tcl set could be used for that purpose. # We are probably only here if testing in the repl - in which case the error messages are important. set var_index_position_list [_split_equalsrhs $equalsrhs] #we may have an insertion-spec that inserts a literal atom e.g to wrap in "ok" # x='ok'/0 data # we won't examine for vars as there is no pipeline - ignore # also ignore trailing * (indicator for variable data to be expanded or not - ie {*}) # we will differentiate between / and @ in the same way that general pattern matching works. # /x will simply call linsert without reference to length of list # @x will check for out of bounds # # !TODO - sort by position lowest to highest? or just require user to order the pattern correctly? foreach v_pos $var_index_position_list { lassign $v_pos v indexspec positionspec #e.g =v1/1>0 A pattern predator system) # #todo - review # # #for now - the script only needs to handle the case of a single segment pipeline (no |> <|) #temp - needs_insertion #we can safely output no script for variable insertions for now - because if there was data available, #we would have to be in a pipeline - in which case the script above would have delegated all our operations anyway. #tag: positionspechandler if {([string index $v 0] eq "'" && [string index $v end] eq "'") || [string is integer -strict $v]} { #(for now)don't allow indexspec on a literal value baked into the pipeline - it doesn't really make sense #- unless the pipeline construction has been parameterised somehow e.g "=${something}/0" #review if {[string length $indexspec]} { error "pipesyntax literal value $v - index specification not allowed (match_assign)1" "match_assign $scopepattern $equalsrhs $args" [list pipesyntax index_on_literal] } if {[string index $v 0] eq "'" && [string index $v end] eq "'"} { set datasource [string range $v 1 end-1] } elseif {[string is integer -strict $v]} { set datasource $v } append script [string map [list $datasource] { set insertion_data }] set needs_insertion 1 } elseif {$v eq ""} { #default variable is 'data' set needs_insertion 0 } else { append script [string map [list $v] { #uplevel? #set insertion_data [set ] }] set needs_insertion 0 } if {$needs_insertion} { set script2 [punk::list_insertion_script $positionspec segmenttail ] set script2 [string map [list "\$insertion_data" ] $script2] append script $script2 } } } if {![string length $scopepattern]} { append script { return $segmenttail } } else { append script [string map [list $scopepattern] { #we still need to bind whether list is empty or not to allow any patternmatch to succeed/fail set d [punk::_multi_bind_result "" $segmenttail] #return [punk::_handle_bind_result $d] #maintenance: inlined if {![dict exists $d result]} { #uplevel 1 [list error [dict get $d mismatch]] #error [dict get $d mismatch] return -code error -level 1 -errorcode [list binding mismatch] [dict get $d mismatch] } else { return [dict get $d result] } }] } debug.punk.pipe.compile {match_assign creating proc $pipecmd} 2 uplevel 1 [list proc $pipecmd args $script] uplevel 1 [list namespace import $pipecmd] tailcall $pipecmd {*}$args } #return a script for inserting data into listvar proc list_insertion_script {keyspec listvar {data }} { set positionspec [string trimright $keyspec "*"] set do_expand [expr {[string index $keyspec end] eq "*"}] if {$do_expand} { set exp {{*}} } else { set exp "" } #NOTE: linsert and lreplace can take multiple values at tail ie expanded data set ptype [string index $positionspec 0] if {$ptype in [list @ /]} { set index [string range $positionspec 1 end] } else { #the / is optional (default) at first position - and we have already discarded the ">" set ptype "/" set index $positionspec } #puts stderr ">> >> $index" set script "" set isint [string is integer -strict $index] if {$index eq "."} { #do nothing - this char signifies no insertion } elseif {$isint || [regexp {^(end|end[-+]{1,2}[0-9]+)$} $index]} { if {$ptype eq "@"} { #compare position to *possibly updated* list - note use of $index > $datalen rather than $index+1 > $datalen - (we allow 'insertion' at end of list by numeric index) if {$isint} { append script [string map [list $listvar $index] { if {( > [llength $])} { #not a pipesyntax error error "pipedata insertionpattern index out of bounds. index: vs len: [llength $] use /x instead of @x to avoid check (list_insertion_script)" "list_insertion_script $keyspec" [list pipedata insertionpattern index_out_f_bounds] } }] } #todo check end-x bounds? } if {$isint} { append script [string map [list $listvar $index $exp $data] { set [linsert [lindex [list $ [unset ]] 0] ] }] } else { append script [string map [list $listvar $index $exp $data] { #use inline K to make sure the list is unshared (optimize for larger lists) set [linsert [lindex [list $ [unset ]] 0] ] }] } } elseif {[string first / $index] < 0 && [string first - $index] > 0} { if {[regexp {^([0-9]+|end|end[-+]{1,2}[0-9]+)-([0-9]+|end|end[-+]{1,2}([0-9]+))$} $index _ start end]} { #also - range checks for @ which must go into script !!! append script [string map [list $listvar $start $end $exp $data] { set [lreplace [lindex [list $ [unset ]] 0] ] }] } else { error "pipesyntax error in segment insertionpattern - positionspec:'$keyspec' unable to interpret position spec (list_insertion_script)2" "list_insertion_script $keyspec" [list pipedata insertionpattern_invalid] } } elseif {[string first / $index] >= 0} { #nested insertion e.g /0/1/2 /0/1-1 set parts [split $index /] set last [lindex $parts end] if {[string first - $last] >=0} { lassign [split $last -] a b if {![regexp {^([0-9]+|end|end[-+]{1,2}[0-9]+)-([0-9]+|end|end[-+]{1,2}([0-9]+))$} $last _ a b]} { error "pipesyntax error in segment insertionpattern - positionspec:'$keyspec' unable to interpret position spec (list_insertion_script)3" "list_insertion_script $keyspec" [list pipesyntax insertionpattern_invalid] } if {$a eq $b} { if {!$do_expand} { #we can do an lset set lsetkeys [list {*}[lrange $parts 0 end-1] $a] append script [string map [list $listvar $lsetkeys $data] { lset }] } else { #we need to lreplace the containing item append script [string map [list $listvar [lrange $parts 0 end-1] $a $data] { set target [lindex $ ] lset target {*} lset $target }] } } else { #we need to lreplace a range at the target level append script [string map [list $listvar [lrange $parts 0 end-1] $a $b $exp $data] { set target [lindex $ ] set target [lreplace $target ] lset $target }] } } else { #last element has no -, so we are inserting at the final position - not replacing append script [string map [list $listvar [lrange $parts 0 end-1] $last $exp $data] { set target [lindex $ ] set target [linsert $target ] lset $target }] } } else { error "pipesyntax error in segment - positionspec:'$keyspec' unable to interpret position spec (list_insertion_script)4" "list_insertion_script $keyspec" [list pipesyntax insertionpattern_invalid] } return $script } #todo - consider whether we can use < for insertion/iteration combinations # =a<,b< iterate once through # =a><,b>< cartesian product # =a<>,b<> ??? zip ? # # ie = {a b c} |> .=< inspect # would call inspect 3 times, once for each argument # .= list {a b c} {x y z} |a/0,b/1> .=a><,b>< list # would produce list of cartesian pairs? # proc _split_equalsrhs {insertionpattern} { set cmdname ::punk::pipecmds::split_rhs_$insertionpattern if {$cmdname in [info commands $cmdname]} { return [$cmdname] } set lst_var_indexposition [punk::_split_patterns $insertionpattern] set i 0 set return_triples [list] foreach v_pos $lst_var_indexposition { lassign $v_pos v index_and_position #e.g varname@@data/ok>0 varname/1/0>end #ensure only one ">" is detected if {![string length $index_and_position]} { set indexspec "" set positionspec "" } else { set chars [split $index_and_position ""] set posns [lsearch -all $chars ">"] if {[llength $posns] > 1} { error "pipesyntax error in segment insertion pattern '$insertionpattern' -v '$v' multiple '>' characters. Pattern not understood." "_split_equalsrhs $insertionpattern" [list pipesyntax insertionpattern_invalid] } if {![llength $posns]} { set indexspec $index_and_position set positionspec "" } else { set splitposn [lindex $posns 0] set indexspec [string range $index_and_position 0 $splitposn-1] set positionspec [string range $index_and_position $splitposn+1 end] } } #review - if {($positionspec in [list "*" "/*" "@*" "/" "@"]) || ($v eq "*" && $positionspec eq "")} { set star "" if {$v eq "*"} { set v "" set star "*" } if {[string index $positionspec end] eq "*"} { set star "*" } #it is always possible to insert at end of list regardless of current length - so /end* and @end* are equivalent #as are /end and @end #lset lst_var_indexposition $i [list $v "/end$star"] set triple [list $v $indexspec "/end$star"] } else { if {$positionspec eq ""} { #e.g just =varname #lset lst_var_indexposition $i [list $v "/end"] set triple [list $v $indexspec "/end"] #error "pipesyntax error in segment insertionpattern '$insertionpattern' - v '$v' missing position spec e.g /0" } else { if {[string index $indexspec 0] ni [list "" "/" "@"]} { error "pipesyntax error in segment insertionpattern '$insertionpattern' - v '$v' bad index spec '$indexspec'" "_split_equalsrhs $insertionpattern" [list pipesyntax insertionpattern_invalid] } set triple [list $v $indexspec $positionspec] } } lappend return_triples $triple incr i } proc $cmdname {} [list return $return_triples] return $return_triples } proc _is_math_func_prefix {e1} { #also catch starting brackets.. e.g "(min(4,$x) " if {[regexp {^[^[:alnum:]]*([[:alnum:]]*).*} $e1 _ word]} { #possible math func if {$word in [info functions]} { return true } } return false } #todo - option to disable these traces which provide clarifying errors (performance hit?) proc pipeline_args_read_trace_error {args} { error "The pipelined data doesn't appear to be a valid Tcl list\nModify input, or use \$data or another variable name instead of \$args." "pipeline_args_read_trace_error $args" [list pipedata args_unavailable_data_not_a_list] } #NOTE: the whole idea of scanning for %x% is a lot of work(performance penalty) #consider single placeholder e.g "_" as only positional indicator - for $data only - and require braced script with $var for more complex requirements #This would simplify code a lot - but also quite possible to collide with user data. #Perhaps not a big deal as unbraced segments between |> are mainly(?) a convenience for readability/repl etc. # (but importantly (at pipeline start anyway) unbraced segments are a mechanism to inject data from calling scope or from pipeline args <|) # #detect and retrieve %xxx% elements from item without affecting list/string rep #commas, @, ', ^ and whitespace not part of a valid tag (allows some substitution within varspecs) #%% is not a valid tag #(as opposed to using regexp matching which causes string reps) proc get_tags {item} { set chars [split $item {}] set terminal_chars [list , @ ' ^ " " \t \n \r] #note % is both terminal and initiating - so for the algorithm we don't include it in the list of terminal_chars set nonterminal [lmap v $chars {expr {$v ni $terminal_chars}}] set percents [lmap v $chars {expr {$v eq "%"}}] #useful for test/debug #puts "CHARS : $chars" #puts "NONTERMINAL: $nonterminal" #puts "PERCENTS : $percents" set sequences [list] set in_sequence 0 set start -1 set end -1 set i 0 #todo - some more functional way of zipping/comparing these lists? set s_length 0 ;#sequence length including % symbols - minimum for tag therefore 2 foreach n $nonterminal p $percents { if {!$in_sequence} { if {$n & $p} { set s_length 1 set in_sequence 1 set start $i set end $i } else { set s_length 0 } } else { if {$n ^ $p} { incr s_length incr end } else { if {$n & $p} { if {$s_length == 1} { # % followed dirctly by % - false start #start again from second % set s_length 1 set in_sequence 1 set start $i set end $i } else { incr end lappend sequences [list $start $end] set in_sequence 0 set s_length 0 set start -1; set end -1 } } else { #terminated - not a tag set in_sequence 0 set s_length 0 set start -1; set end -1 } } } incr i } set tags [list] foreach s $sequences { lassign $s start end set parts [lrange $chars $start $end] lappend tags [join $parts ""] } return $tags } #show underlying rep of list and first level proc rep_listname {lname} { upvar $lname l set output "$lname list rep: [rep $l]\n" foreach item $l { append output "-rep $item\n" append output " [rep $item]\n" } return $output } # # # relatively slow on even small sized scripts proc arg_is_script_shaped2 {arg} { set re {^(\s|;|\n)$} set chars [split $arg ""] if {[lsearch -regex $chars $re] >=0} { return 1 } else { return 0 } } proc arg_is_script_shaped {arg} { if {[string first " " $arg] >= 0} { return 1 } elseif {[string first \n $arg] >= 0} { return 1 } elseif {[string first ";" $arg] >= 0} { return 1 } elseif {[string first \t $arg] >= 0} { return 1 } else { return 0 } } proc pipeline {segment_op initial_returnvarspec equalsrhs args} { set fulltail $args #unset args ;#leave args in place for error diagnostics debug.punk.pipe {call pipeline: op:'$segment_op' '$initial_returnvarspec' '$equalsrhs' '$fulltail'} 4 #debug.punk.pipe.rep {[rep_listname fulltail]} 6 #--------------------------------------------------------------------- # test if we have an initial x.=y.= or x.= y.= #nextail is tail for possible recursion based on first argument in the segment set nexttail [lassign $fulltail next1] ;#tail head if {$next1 eq "pipematch"} { set results [uplevel 1 [list pipematch {*}$nexttail]] debug.punk.pipe {>>> pipematch results: $results} 1 set d [_multi_bind_result $initial_returnvarspec $results] return [_handle_bind_result $d] } elseif {$next1 eq "pipecase"} { set msg "pipesyntax\n" append msg "pipecase does not return a value directly in the normal way\n" append msg "It will return a casemismatch dict on mismatch\n" append msg "But on a successful match - it will use an 'error' mechanism to return {ok result {something}} in the caller's scope -\n" append msg "This will appear as an error in the repl, or disrupt pipeline result propagation if not in an appropriate wrapper\n" append msg "Call pipecase from within a pipeline script block or wrapper such as pipeswitch or apply." error $msg } #temp - this is related to a script for the entire pipeline (functional composition) - not the script for the segment-based x=y or x.=y proc. set ::_pipescript "" #NOTE: #important that for assignment: #= x=y .. #The second element is always treated as a raw value - not a pipeline instruction. #whereas... for execution: #.= x=y the second element is a pipeline-significant symbol based on the '=' even if it was passed in as an argument. #Usually an execution segment (.= cmd etc..) will have args inserted at the tail anyway - #- but if the pipeline is designed to put an argument in the zero position - then presumably it is intended as a pipeline-significant element anyway #This gives a *slight* incompatibility with external commands containing '=' - in that they may not work properly in pipelines # if {$segment_op ne "="} { #handle for example: #var1.= var2= "etc" |> string toupper # #var1 will contain ETC, var2 will contain etc # if {([string first = $next1] >= 0) && (![arg_is_script_shaped $next1]) } { #*SUB* pipeline recursion. #puts "======> recurse based on next1:$next1 " #set re_dot_assign {^([^ \t\r\n=\{]*)\.=(.*)} if {[regexp {^([^ \t\r\n=\{]*)\.=(.*)} $next1 _ nextreturnvarspec nextrhs]} { #non pipelined call to self - return result #debug.punk.pipe {nextreturnvarspec: $nextreturnvarspec nextrhs:$nextrhs tail:$nexttail} 4 #set results [uplevel 1 [list ::punk::pipeline .= $nextreturnvarspec $nextrhs {*}$nexttail]] set results [uplevel 1 [list $next1 {*}$nexttail]] #debug.punk.pipe.rep {==> rep recursive results: [rep $results]} 5 #debug.punk.pipe {>>> results: $results} 1 return [_handle_bind_result [_multi_bind_result $initial_returnvarspec $results]] } #puts "======> recurse asssign based on next1:$next1 " if {[regexp {^([^ \t\r\n=\{]*)=(.*)} $next1 _ nextreturnvarspec nextrhs]} { #non pipelined call to plain = assignment - return result #debug.punk.pipe {nextreturnvarspec: $nextreturnvarspec nextrhs:$nextrhs tail:$nexttail} 4 #set results [uplevel 1 [list ::punk::pipeline = $nextreturnvarspec $nextrhs {*}$nexttail]] set results [uplevel 1 [list $next1 {*}$nexttail]] #debug.punk.pipe {>>> results: $results} 1 set d [_multi_bind_result $initial_returnvarspec $results] return [_handle_bind_result $d] } } } set procname $initial_returnvarspec.=$equalsrhs #--------------------------------------------------------------------- #todo add 'op' argument and handle both .= and = # #|> data piper symbol #<| args piper symbol (arguments supplied at end of pipeline e.g from commandline or from calling and/or currying the command) # set more_pipe_segments 1 ;#first loop #this contains the main %data% and %datalist% values going forward in the pipeline #as well as any extra pipeline vars defined in each |> #It also contains any 'args' with names supplied in <| set dict_tagval [dict create] ;#cumulative %x% tag dict which operates on the whole length of the pipeline #determine if there are input args at the end of the pipeline indicated by reverse <| symbol possibly with argspecs e.g transform x y z =0} { set tailremaining [lrange $fulltail 0 $firstargpipe_posn-1] set argslist [lrange $fulltail $firstargpipe_posn+1 end] ;#Note that this could be a whole other pipeline with |> and/or <| elements. set argpipe [lindex $fulltail $firstargpipe_posn] set argpipespec [string range $argpipe 1 end-1] ;# strip off < & | from " b1 b2 b3 |outpipespec> c1 c2 c3 # for a1 a2 a3 - the pipe to the right is actually an outpipespec and for c1 c2 c3 the pipe to the left is an inpipespec #our initial command list always has *something* before we see any pipespec |> #Therefore we initially have a blank inpipespec (although in effect, it comes from the argpipespec <|) set inpipespec $argpipespec set outpipespec "" #avoiding regexp on each arg to maintain list reps #set tailmap [lmap v $tailremaining {lreplace [split $v {}] 1 end-1}] ## set tailmap [lmap v $tailremaining {if {[regexp {^\|(.*)>$} $v _ outpipespec] && !$pipeseen} {set pipeseen 1;set outpipespec} {if {$pipeseen} {set v} 0}}] #e.g for: a b c |> e f g |> h #set firstpipe_posn [lsearch $tailmap {| >}] set firstpipe_posn [lsearch $tailremaining "|*>"] if {$firstpipe_posn >=0} { set outpipespec [string range [lindex $tailremaining $firstpipe_posn] 1 end-1] set segment_members [lrange $tailremaining 0 $firstpipe_posn-1] #set tailremaining [lrange $tailremaining $firstpipe_posn+1 end] set tailremaining [lreplace $tailremaining 0 $firstpipe_posn] ;#generally too short for any K combinator benefit? what about lists with scripts? is it dependent on list length or also element content size? } else { set segment_members $tailremaining set tailremaining [list] } set script_like_first_word 0 set rhs $equalsrhs set segment_first_is_script 0 ;#default assumption until tested set segment_first_word [lindex $segment_members 0] if {$segment_op ne "="} { if {[arg_is_script_shaped $segment_first_word]} { set segment_first_is_script 1 } } else { if {[llength $segment_members] > 1} { error "pipedata = can only accept a single argument (got: '$segment_members')" "pipeline $segment_op $initial_returnvarspec $equalsrhs $fulltail" [list pipedata too_many_elements] #proc pipeline {segment_op initial_returnvarspec equalsrhs args} } set segment_members $segment_first_word } #tailremaining includes x=y during the loop. set returnvarspec $initial_returnvarspec if {![llength $argslist]} { unset -nocomplain previous_result ;# we want it unset for first iteration - differentiate from empty string } else { set previous_result $argslist } set segment_result_list [list] set i 0 ;#segment id set j 1 ;#next segment id set pipespec(args) $argpipespec ;# from trailing <| set pipespec(0,in) $inpipespec set pipespec(0,out) $outpipespec set max_iterations 100 ;# configurable? -1 for no limit ? This is primarily here to aid debugging of runaway loops in initial development .. should probably set to no-limit in final version. while {$more_pipe_segments == 1} { #--------------------------------- debug.punk.pipe {[a+ yellow bold]i$i SEGMENT MEMBERS([llength $segment_members]): $segment_members[a+]} 4 debug.punk.pipe {[a+ yellow bold]i$i TAIL REMAINING([llength $tailremaining]): $tailremaining[a+]} 4 debug.punk.pipe {[a+] inpipespec(prev [a+ yellow bold]|$pipespec($i,in)[a+]>) outpipespec(next [a+ yellow bold]|$pipespec($i,out)>[a+])} 4 debug.punk.pipe {[a+ cyan bold] segment_first_is_script:$segment_first_is_script} 4 if {$segment_first_is_script} { debug.punk.pipe {[a+ cyan bold] script segment: [lindex $segment_members 0][a+]} 4 } #examine inpipespec early to give faster chance for mismatch. ie before scanning segment for argument position set segment_result "" if {[info exists previous_result]} { set prevr $previous_result } else { set prevr "" } set pipedvars [dict create] if {[string length $pipespec($i,in)]} { #check the varspecs within the input piper # - data and/or args may have been manipulated set d [apply {{mv res} { punk::_multi_bind_result $mv $res -levelup 1 }} $pipespec($i,in) $prevr] #temp debug #if {[dict exists $d result]} { #set jjj [dict get $d result] #puts "!!!!! [rep $jjj]" #} set inpipespec_result [_handle_bind_result $d] set pipedvars [dict get $d setvars] set prevr $inpipespec_result ;# leftmost spec in |> needs to affect pipeline flow of 'data' #puts stdout "inpipespec:|$pipespec($i,in)> prevr:$prevr setvars: $pipedvars" } debug.punk.pipe {[a+] previous_iteration_result: $prevr[a+]} 6 debug.punk.pipe.rep {rep previous_iteration_result [rep $prevr]} if {$i == $max_iterations} { puts stderr "ABORTING. Reached max_iterations $max_iterations (todo: make configurable)" set more_pipe_segments 0 } set insertion_patterns [_split_equalsrhs $rhs] ;#raises error if rhs of positionspec not like /* or @* #puts stdout ">>> insertion_patterns $insertion_patterns" set segment_has_insertions [expr {[llength $insertion_patterns] > 0}] debug.punk.pipe.var {segment_has_insertions: $insertion_patterns} 5 debug.punk.pipe.rep {[rep_listname segment_members]} 4 #whether the segment has insertion_patterns or not - apply any modification from the piper argspecs (script will use modified args/data) #pipedvars comes from either previous segment |>, or <| args if {[dict exists $pipedvars "data"]} { #dict set dict_tagval %data% [list [dict get $pipedvars "data"]] dict set dict_tagval data [dict get $pipedvars "data"] } else { if {[info exists previous_result]} { dict set dict_tagval data $prevr } } foreach {vname val} $pipedvars { #add additionally specified vars and allow overriding of %args% and %data% by not setting them here if {$vname eq "data"} { #already potentially overridden continue } dict set dict_tagval $vname $val } #todo! #segment_script - not in use yet. #will require non-iterative pipeline processor to use ... recursive.. or coroutine based set script "" if {!$segment_has_insertions} { #debug.punk.pipe.var {[a+ cyan]SEGMENT has no tags[a+]} 7 #add previous_result as data in end position by default, only if *no* insertions specified (data is just list-wrapped previous_result) #set segment_members_filled [concat $segment_members [dict get $dict_tagval %data%]] ;# data flows through by default as single element - not args - because some strings are not valid lists #insertion-specs with a trailing * can be used to insert data in args format set segment_members_filled $segment_members if {[dict exists $dict_tagval data]} { lappend segment_members_filled [dict get $dict_tagval data] } } else { debug.punk.pipe.var {processing insertion_pattern dict_tagval: $dict_tagval} 4 set segment_members_filled [list] set segmenttail $segment_members ;# todo - change to segment_members here to match punk::match_assign set cmdname "::punk::pipecmds::insertion_$rhs" #commandname can contain glob chars - must search for exact membership in 'info commands' result. if {$cmdname ni [info commands $cmdname]} { set insertion_script "proc $cmdname {dict_tagval segmenttail} {\n" foreach v_pos $insertion_patterns { #puts stdout "v_pos '$v_pos'" lassign $v_pos v indexspec positionspec ;#v may be atom, or varname (in pipeline scope) #puts stdout "v:'$v' indexspec:'$indexspec' positionspec:'$positionspec'" #julz append insertion_script \n [string map [list $v_pos] { lassign [list ] v indexspec positionspec }] if {([string index $v 0] eq "'") && ([string index $v end] eq "'")} { set v [string range $v 1 end-1] ;#assume trailing ' is present! if {[string length $indexspec]} { error "pipesyntax - index not supported on atom" "pipeline $segment_op $initial_returnvarspec $equalsrhs $args" [list pipesyntax index_on_literal] } append insertion_script \n "set insertion_data $v" ;#sub in shortened $v now -i.e use atom value itself (string within single quotes) } elseif {[string is double -strict $v]} { #don't treat numbers as variables if {[string length $indexspec]} { error "pipesyntax - index not supported on number" "pipeline $segment_op $initial_returnvarspec $equalsrhs $args" [list pipesyntax index_on_literal] } append insertion_script \n {set insertion_data $v} } else { append insertion_script \n [string map [list $cmdname] { #puts ">>> v: $v dict_tagval:'$dict_tagval'" if {$v eq ""} { set v "data" } if {[dict exists $dict_tagval $v]} { set insertion_data [dict get $dict_tagval $v] #todo - use destructure_func set d [punk::_multi_bind_result $indexspec $insertion_data] set insertion_data [punk::_handle_bind_result $d] } else { #review - skip error if varname is 'data' ? #e.g we shouldn't really fail for: #.=>* list a b c <| #we need to be careful not to insert empty-list as an argument by default error "pipevariable - varname $v not present in pipeline context. pipecontext_vars: [dict keys $dict_tagval] (2)" " pipecontext_vars: [dict keys $dict_tagval]" [list pipevariable variable_not_in_pipeline_scope] } }] } #append script [string map [list $getv]{ # #}] #maintenance - index logic should be similar identical? to to match_assign - which only needs to process atoms because it (for now?) delegates all pipeline ops here, so no vars available (single segment assign) #tag: positionspechandler #puts stdout "=== list_insertion_script '$positionspec' segmenttail " set script2 [punk::list_insertion_script $positionspec segmenttail ] set script2 [string map [list "\$insertion_data" ] $script2] append insertion_script \n $script2 } append insertion_script \n {set segmenttail} append insertion_script \n "}" #puts stderr "$insertion_script" debug.punk.pipe.compile {creating proc ::punk::pipecmds::insertion_$rhs } 4 eval $insertion_script } set segment_members_filled [::punk::pipecmds::insertion_$rhs $dict_tagval [lindex [list $segmenttail [unset segmenttail]] 0] ] #set segment_members_filled $segmenttail #note - length of segment_members_filled may now differ from length of original segment_members! (if do_expand i.e trailing * in any insertion_patterns) } set rhs [string map $dict_tagval $rhs] debug.punk.pipe.rep {segment_members_filled rep: [rep $segment_members_filled]} 4 # script index could have changed!!! todo fix! #we require re_dot_assign before re_assign (or fix regexes so order doesn't matter!) if {(!$segment_first_is_script ) && $segment_op eq ".="} { #no scriptiness detected #debug.punk.pipe.rep {[a+ yellow bold][rep_listname segment_members_filled][a+]} 4 set cmdlist_result [uplevel 1 $segment_members_filled] #debug.punk.pipe {[a+ green bold]forward_result: $forward_result[a+]} 4 #debug.punk.pipe.rep {[a+ yellow bold]forward_result REP: [rep $forward_result][a+]} 4 #set d [_multi_bind_result $returnvarspec [punk::K $cmdlist_result [unset cmdlist_result]]] set d [_multi_bind_result $returnvarspec [lindex [list $cmdlist_result [unset cmdlist_result ]] 0]] set segment_result [_handle_bind_result $d] #puts stderr ">>forward_result: $forward_result segment_result $segment_result" } elseif {$segment_op eq "="} { #slightly different semantics for assigment! #We index into the DATA - not the position within the segment! #(an = segment must take a single argument, as opposed to a .= segment) #(This was a deliberate design choice for consistency with set, and to reduce errors.) #(we could have allowed multiple args to = e.g to form a list, but it was tried, and the edge-cases were unintuitive and prone to user error) #(The choice to restrict to single argument, but allow insertion and appending via insertion-specs is more explicit and reliable even though the insertion-specs operate differently to those of .=) # #we have to ensure that for an empty segment - we don't append to the empty list, thus listifying the data #v= {a b c} |> = # must return: {a b c} not a b c # if {!$segment_has_insertions} { set segment_members_filled $segment_members if {[dict exists $dict_tagval data]} { if {![llength $segment_members_filled]} { set segment_members_filled [dict get $dict_tagval data] } else { lappend segment_members_filled [dict get $dict_tagval data] } } } set d [_multi_bind_result $returnvarspec [lindex [list $segment_members_filled [unset segment_members_filled ]] 0]] set segment_result [_handle_bind_result $d] } elseif {$segment_first_is_script || $segment_op eq "script"} { #script debug.punk.pipe {[a+ cyan bold].. evaluating as script[a+]} 2 set script [lindex $segment_members 0] #build argument lists for 'apply' set segmentargnames [list] set segmentargvals [list] foreach {k val} $dict_tagval { if {$k eq "args"} { #skip args - it is manually added at the end of the apply list if it's a valid tcl list continue } lappend segmentargnames $k lappend segmentargvals $val } set argsdatalist $prevr ;#default is raw result as a list. May be overridden by an argspec within |> e.g |args@@key> or stripped if not a tcl list #puts "------> rep prevr argsdatalist: [rep $argsdatalist]" set add_argsdata 0 if {[dict exists $dict_tagval "args"]} { set argsdatalist [dict get $dict_tagval "args"] #see if the raw result can be treated as a list if {[catch {lindex $argsdatalist 0}]} { #we cannot supply 'args' set pre_script "" #todo - only add trace if verbose warnings enabled? append pre_script "trace add variable args read punk::pipeline_args_read_trace_error\n" set script $pre_script append script $segment_first_word set add_argsdata 0 } else { set add_argsdata 1 } } debug.punk.pipe.rep {>> [rep_listname segmentargvals]} 4 set ns [uplevel 1 {::namespace current}] if {!$add_argsdata} { debug.punk.pipe {APPLY1: (args not set; not a list) segment vars:$segmentargnames} 4 #puts stderr " script: $script" #puts stderr " vals: $segmentargvals" set evaluation [uplevel 1 [list ::apply [::list $segmentargnames $script $ns] {*}$segmentargvals]] } else { debug.punk.pipe {APPLY2: (args is set)segment vars:$segmentargnames} 4 #puts stderr " script: $script" #puts stderr " vals: $segmentargvals $argsdatalist" #pipeline script context should be one below calling context - so upvar v v will work set evaluation [uplevel 1 [list ::apply [::list [::concat $segmentargnames args] $script $ns] {*}$segmentargvals {*}$argsdatalist]] } debug.punk.pipe.rep {script result, evaluation: [rep_listname evaluation]} 4 #puts "---> rep script evaluation result: [rep $evaluation]" #set d [_multi_bind_result $returnvarspec [punk::K $evaluation [unset evaluation]]] #trailing segment_members are *pipedata* scripts - as opposed to ordinary pipeline scripts! set tail_scripts [lrange $segment_members 1 end] if {[llength $tail_scripts]} { set r [pipedata $evaluation {*}$tail_scripts] } else { set r $evaluation } set d [_multi_bind_result $returnvarspec [lindex [list $r [unset r]] 0]] set segment_result [_handle_bind_result $d] } else { #tags ? #debug.punk.pipe {>>raw commandline: [concat $rhs $segment_members_filled]} 5 if 0 { #set s [list uplevel 1 [concat $rhs $segment_members_filled]] if {![info exists pscript]} { upvar ::_pipescript pscript } if {![info exists pscript]} { #set pscript $s set pscript [funcl::o_of_n 1 $segment_members] } else { #set pscript [string map [list

$pscript] {uplevel 1 [concat $rhs $segment_members_filled [

]]}] #set snew "set pipe_$i \[uplevel 1 \[list $rhs $segment_members_filled " #append snew "set pipe_[expr $i -1]" #append pscript $snew set pscript [funcl::o_of_n 1 $segment_members $pscript] } } set cmdlist_result [uplevel 1 $segment_members_filled] #set d [_multi_bind_result $returnvarspec [punk::K $segment_members_filled [unset segment_members_filled]]] set d [_multi_bind_result $returnvarspec [lindex [list $cmdlist_result [unset cmdlist_result]] 0 ]] #multi_bind_result needs to return a funcl for rhs of: #lindex [list [set syncvar [main pipeline.. ]] [rhs binding funcl...] 1 ] #which uses syncvar # #The lhs of 'list' runs first so now syncvar can be the root level of the rhs function list and bind the necessary vars. #NOTE: unintuitively, we are returning the value of rhs to the main pipleline! (leftmost binding) this is because the leftmost binding determines what goes back to the pipeline result set segment_result [_handle_bind_result $d] } #the subresult doesn't need to go backwards - as the final assignment can emit the result into a variable #It makes more sense and is ultimately more useful (and more easy to reason about) for the result of each assignment to be related only to the pre-pipe section #It may however make a good debug point #puts stderr "segment $i segment_result:$segment_result" debug.punk.pipe.rep {[rep_listname segment_result]} 3 #examine tailremaining. # either x x x |?> y y y ... # or just y y y #we want the x side for next loop #set up the conditions for the next loop #|> x=y args # inpipespec - contents of previous piper |xxx> # outpipespec - empty or content of subsequent piper |xxx> # previous_result # assignment (x=y) set pipespec($j,in) $pipespec($i,out) set outpipespec "" set tailmap "" set next_pipe_posn -1 if {[llength $tailremaining]} { #set tailmap [lmap v $tailremaining {lreplace [split $v {}] 1 end-1}] ##e.g for: a b c |> e f g |> h #set next_pipe_posn [lsearch $tailmap {| >}] set next_pipe_posn [lsearch $tailremaining "|*>"] set outpipespec [string range [lindex $tailremaining $next_pipe_posn] 1 end-1] } set pipespec($j,out) $outpipespec set script_like_first_word 0 if {[llength $tailremaining] || $next_pipe_posn >= 0} { if {$next_pipe_posn >=0} { set next_all_members [lrange $tailremaining 0 $next_pipe_posn-1] ;#exclude only piper |xxx> for set tailremaining [lrange $tailremaining $next_pipe_posn+1 end] } else { set next_all_members $tailremaining set tailremaining [list] } #assignment is the arg immediately following |> operator e.g x.=blah or x=etc (or a normal commandlist or script!) set segment_first_word "" set returnvarspec "" ;# the lhs of x=y set segment_op "" set rhs "" set segment_first_is_script 0 if {[llength $next_all_members]} { if {[arg_is_script_shaped [lindex $next_all_members 0]]} { set segment_first_word [lindex $next_all_members 0] set segment_first_is_script 1 set segment_op "" set segment_members $next_all_members } else { set possible_assignment [lindex $next_all_members 0] #set re_dot_assign {^([^ \t\r\n=\{]*)\.=(.*)} if {[regexp {^([^ \t\r\n=\{]*)\.=(.*)} $possible_assignment _ returnvarspec rhs]} { set segment_op ".=" set segment_first_word [lindex $next_all_members 1] set script_like_first_word [arg_is_script_shaped $segment_first_word] if {$script_like_first_word} { set segment_first_is_script 1 ;#relative to segment_members which no longer includes the .= } set segment_members [lrange $next_all_members 1 end] } elseif {[regexp {^([^ \t\r\n=]*)=(.*)} $possible_assignment _ returnvarspec rhs]} { set segment_op "=" #never scripts #must be at most a single element after the = ! if {[llength $next_all_members] > 2} { #raise this as pipesyntax as opposed to pipedata? error "pipesyntax - at most one element can follow = (got [lrange $next_all_members 1 end])" "pipeline $segment_op $returnvarspec $rhs [lrange $next_all_members 1 end]" [list pipesyntax too_many_elements] } set segment_first_word [lindex $next_all_members 1] if {[catch {llength $segment_first_word}]} { set segment_is_list 0 ;#only used for segment_op = } else { set segment_is_list 1 ;#only used for segment_op = } set segment_members $segment_first_word } else { #no assignment operator and not script shaped set segment_op "" set returnvarspec "" set segment_first_word [lindex $next_all_members 0] set segment_first_word [lindex $next_all_members 1] set segment_members $next_all_members #puts stderr ">>3 no-operator segment_first_word: '$segment_first_word'" } } } else { #?? two pipes in a row ? debug.punk.pipe {[a+ yellow bold]WARNING: no segment members found[a+]} 0 set segment_members return set segment_first_word return } #set forward_result $segment_result set previous_result $segment_result } else { debug.punk.pipe {[a+ cyan bold]End of pipe segments ($i)[a+]} 4 #output pipe spec at tail of pipeline set pipedvars [dict create] if {[string length $pipespec($i,out)]} { set d [apply {{mv res} { punk::_multi_bind_result $mv $res -levelup 1 }} $pipespec($i,out) $segment_result] set segment_result [_handle_bind_result $d] set pipedvars [dict get $d setvars] } set more_pipe_segments 0 } #the segment_result is based on the leftmost var on the lhs of the .= #whereas forward_result is always the entire output of the segment lappend segment_result_list $segment_result incr i incr j } ;# end while return [lindex $segment_result_list end] #return $forward_result } #just an experiment #what advantage/difference versus [llength [lrange $data $start $end]] ??? proc data_range_length {data start end} { set datalen [llength $data] #normalize to s and e if {$start eq "end"} { set s [expr {$datalen - 1}] } elseif {[string match end-* $start]} { set stail [string range $start 4 end] set posn [expr {$datalen - $stail -1}] if {$posn < 0} { return 0 } set s $posn } else { #int if {($start < 0) || ($start > ($datalen -1))} { return 0 } set s $start } if {$end eq "end"} { set e [expr {$datalen - 1}] } elseif {[string match end-* $end]} { set etail [string range $end 4 end] set posn [expr {$datalen - $etail -1}] if {$posn < 0} { return 0 } set e $posn } else { #int if {($end < 0)} { return 0 } set e $end } if {$s > ($datalen -1)} { return 0 } if {$e > ($datalen -1)} { set e [expr {$datalen -1}] } if {$e < $s} { return 0 } return [expr {$e - $s + 1}] } proc know {cond body} { set existing [info body ::unknown] #assuming we can't test on cond being present in existing unknown script - because it may be fairly simple and prone to false positives (?) ##This means we can't have 2 different conds with same body if we test for body in unknown. ##if {$body ni $existing} { package require base64 set scr [base64::encode -maxlen 0 $cond] ;#will only be decoded if the debug is triggered #tcllib has some double-substitution going on.. base64 seems easiest and will not impact the speed of normal execution when debug off. proc ::unknown {args} [string map [list @c@ $cond @b@ $body @scr@ $scr] { #--------------------------------------- if {![catch {expr {@c@}} res] && $res} { debug.punk.unknown {HANDLED BY: punk unknown_handler ([llength $args]) args:'$args' "cond_script:'[punk::decodescript @scr@]'" } 4 return [eval {@b@}] } else { debug.punk.unknown {skipped: punk unknown_handler ([llength $args]) args:'$args' "cond_script:'[punk::decodescript @scr@]'" } 4 } #--------------------------------------- }]$existing #} } proc know? {{len 2000}} { puts [string range [info body ::unknown] 0 $len] } proc decodescript {b64} { if {[ catch { package require base64 base64::decode $b64 } scr]} { return "" } else { return "($scr)" } } proc configure_unknown {} { #----------------------------- #these are critical e.g core behaviour or important for repl displaying output correctly #---------------- #for var="val {a b c}" #proc ::punk::val {{v {}}} {tailcall lindex $v} #proc ::punk::val {{v {}}} {return $v} ;#2023 - approx 2x faster than the tailcall lindex version proc ::punk::val [list [list v [purelist]]] {return $v} #---------------- #can't use know - because we don't want to return before original unknown body is called. proc ::unknown {args} [string map [list] { package require base64 set ::punk::last_run_display [list] set ::repl::last_unknown [lindex $args 0] ;#jn }][info body ::unknown] #handle process return dict of form {exitcode num etc blah} #ie when the return result as a whole is treated as a command #exitcode must be the first key know {[lindex $args 0 0] eq "exitcode"} { uplevel 1 [list exitcode {*}[lrange [lindex $args 0] 1 end]] } #----------------------------- # # potentially can be disabled by config(?) - but then scripts not able to use all repl features.. #todo - repl output info that it was evaluated as an expression #know {[expr $args] || 1} {expr $args} know {[expr $args] || 1} {tailcall expr $args} #it is significantly faster to call a proc like this than to inline it in the unknown proc proc ::punk::range {from to args} { set count [expr {($to -$from) + 1}] incr from -1 return [lmap v [lrepeat $count 0] {incr from}] } know {[regexp {^([0-9]+)\.\.([0-9]+)$} [lindex $args 0 0] -> from to]} {punk::range $from $to} #NOTE: #we don't allow setting namespace qualified vars in the lhs assignment pattern. #The principle is that we shouldn't be setting vars outside of the immediate calling scope. #(It would also be difficult and error-prone and generally make the pipelines less re-usable and reliable) #Therefore ::nswhatever::blah= x is the pipeline: blah= x - where the corresponding command, if any is first resolved in ::nswhatever #We will require that the namespace already exists - which is consistent with if the command were to be run without unknown proc ::punk::_unknown_assign_dispatch {matchedon pattern equalsrhs args} { set tail [lassign $args hd] #puts "-> _unknown_assign_dispatch '$partzerozero' pattern:'$pattern' equalsrhs:'$equalsrhs' args:'$args' argshd:'$hd' argstail:'$tail'" if {$hd ne $matchedon} { if {[llength $tail]} { error "unknown_assign_dispatch: pipeline with args unexpanded. Try {*}\$pipeline $tail" } #regexp $punk::re_assign $hd _ pattern equalsrhs #we assume the whole pipeline has been provided as the head regexp {^([^\t\r\n=]*)\=([^ \t\r\n]*)(.*)} $hd _ pattern equalsrhs tail } #NOTE: - it doesn't make sense to call 'namespace' qualifiers or 'namespace tail' on a compound hd such as v,::etc= blah # we only look at leftmost namespace-like thing and need to take account of the pattern syntax # e.g for ::etc,'::x'= # the ns is :: and the tail is etc,'::x'= # (Tcl's namespace qualifiers/tail won't help here) if {[string match ::* $hd]} { set patterns [punk::_split_patterns $hd] #get a pair-list something like: {::x /0} {etc {}} set ns [namespace qualifiers [lindex $patterns 0 0]] set nslen [string length $ns] set patterntail [string range $ns $nslen end] } else { set ns "" set patterntail $pattern } if {[string length $ns] && ![namespace exists $ns]} { error "unknown_assign_dispatch: namespace '$ns' not found. (Note that pipeline lhs variables cannot be namespaced)" } else { set nscaller [uplevel 1 [list ::namespace current]] set commands [uplevel 1 [list ::info commands $pattern=$equalsrhs]] ;#uplevel - or else we are checking from perspective of this namespace ::punk #we must check for exact match of the command in the list - because command could have glob chars. if {"$pattern=$equalsrhs" in $commands} { puts stderr "unknown_assign_dispatch>> '$pattern=$equalsrhs' $commands nscaller: '$nscaller'" #we call the namespaced function - we don't evaluate it *in* the namespace. #REVIEW #warn for now...? #tailcall $pattern=$equalsrhs {*}$args tailcall $pattern=$equalsrhs {*}$tail } } #puts "--->nscurrent [uplevel 1 [list namespace current]]" #ignore the namespace.. #We could interpret the fact that the nonexistant pipe was called with a namespace to indicate that's where the pipecommand should be created.. #But.. we would need to ensure 1st (compiling) invocation runs the same way as subsequent invocations. #namespace evaling match_assign here probably wouldn't accomplish that and may create surprises with regards to where lhs vars(if any) are created tailcall ::punk::match_assign $patterntail $equalsrhs {*}$tail #return [uplevel 1 [list ::punk::match_assign $varspecs $rhs $tail]] } #variable re_assign {^([^\r\n=\{]*)=(.*)} #characters directly following = need to be assigned to the var even if they are escaped whitespace (e.g \t \r \n) #unescaped whitespace causes the remaining elements to be part of the tail -ie are appended to the var as a list #e.g x=a\nb c #x will be assigned the list {a\nb c} ie the spaces between b & c are not maintained # know {[regexp {^([^\t\r\n=]*)\=([^ \t\r\n]*)} [lindex $args 0] matchedon pattern equalsrhs]} {tailcall ::punk::_unknown_assign_dispatch $matchedon $pattern $equalsrhs {*}$args} know {[regexp {^{([^\t\r\n=]*)\=([^ \t\r\n]*)}} [lindex $args 0] matchedon pattern equalsrhs]} {tailcall ::punk::_unknown_assign_dispatch $matchedon $pattern $equalsrhs {*}$args} #variable re_assign {^([^\r\n=\{]*)=(.*)} #know {[regexp $punk::re_assign [lindex $args 0 0] partzerozero varspecs rhs]} { # set tail [lassign $args hd] # if {$hd ne $partzerozero} { # regexp $punk::re_assign $hd _ varspecs rhs # } # # tailcall so match_assign runs at same level as the unknown proc # tailcall ::punk::match_assign $varspecs $rhs $tail #} proc ::punk::_unknown_compare {val1 val2 args} { if {![string length [string trim $val2]]} { if {[llength $args] > 1} { #error "Extra args after comparison operator ==. usage e.g : \$var1==\$var2 or \$var1==\$var2 + 2" set val2 [string cat {*}[lrange $args 1 end]] return [expr {$val1 eq $val2}] } return $val1 } elseif {[llength $args] == 1} { #simple comparison if {[string is digit -strict $val1$val2]} { return [expr {$val1 == $val2}] } else { return [string equal $val1 $val2] } } elseif {![catch {expr $val2 {*}[lrange $args 1 end]} evaluated]} { if {[string is digit -strict $val1$evaluated]} { return [expr {$val1 == $evaluated}] } else { return [expr {$val1 eq $evaluated}] } } else { set evaluated [uplevel 1 [list {*}$val2 {*}[lrange $args 1 end]]] if {[string is digit -strict $val1$evaluated]} { return [expr {$val1 == $evaluated}] } else { return [expr {$val1 eq $evaluated}] } } } #ensure == is after = in know sequence #.* on left is pretty broad - todo: make it a little more specific to avoid unexpected interactions know {[regexp {(.*)==(.*)} [lindex $args 0] _ val1 val2]} {tailcall ::punk::_unknown_compare $val1 $val2 {*}$args} #.= must come after = here to ensure it comes before = in the 'unknown' proc #set punk::re_dot_assign {([^=]*)\.=(.*)} #know {[regexp $punk::re_dot_assign [lindex $args 0 0] _ varspecs rhs]} { # set tail [expr {([lindex $args 0] eq [lindex $args 0 0]) ? [lrange $args 1 end] : [concat [lrange [lindex $args 0] 1 end] [lrange $args 1 end] ] }] # tailcall ::punk::match_exec $varspecs $rhs {*}$tail # #return [uplevel 1 [list ::punk::match_exec $varspecs $rhs {*}$tail]] # } # proc ::punk::_unknown_dot_assign_dispatch {partzerozero pattern equalsrhs args} { set argstail [lassign $args hd] #this equates to auto-flattening the head.. which seems like a bad idea, the structure was there for a reason. #we should require explicit {*} expansion if the intention is for the args to be joined in at that level. #expr {($hd eq $partzerozero) ? [set tail $argstail] : [set tail [concat [lrange $hd 1 end] $argstail ]] } if {$hd ne $partzerozero} { if {[llength $argstail]} { error "unknown_dot_assign_dispatch: pipeline with args unexpanded. Try {*}\$pipeline $argstail" } #regexp $punk::re_assign $hd _ pattern equalsrhs #we assume the whole pipeline has been provided as the head #regexp {^([^ \t\r\n=\{]*)\.=([^ \t\r\n]*)(.*)} $hd _ pattern equalsrhs argstail regexp {^([^ \t\r\n=\{]*)\.=([^ \t\r\n]*)(.*)} $hd _ pattern equalsrhs argstail } #tailcall ::punk::match_assign $pattern $equalsrhs {*}$argstail return [uplevel 1 [list ::punk::pipeline .= $pattern $equalsrhs {*}$argstail]] } #variable re_dot_assign {^([^ \t\r\n=\{]*)\.=(.*)} know {[regexp {^([^ \t\r\n=\{]*)\.=(.*)} [lindex $args 0 0] partzerozero varspecs rhs]} {tailcall punk::_unknown_dot_assign_dispatch $partzerozero $varspecs $rhs {*}$args} #know {[regexp $punk::re_dot_assign [lindex $args 0 0] partzerozero varspecs rhs]} { # set argstail [lassign $args hd] # #set tail [expr {($hd eq $partzerozero) ? $argstail : [concat [lrange $hd 1 end] $argstail ] }] ;#!WRONG. expr will convert some numbers to scientific notation - this is premature/undesirable! # #avoid using the return from expr and it works: # expr {($hd eq $partzerozero) ? [set tail $argstail] : [set tail [concat [lrange $hd 1 end] $argstail ]] } # # tailcall ::punk::match_exec $varspecs $rhs {*}$tail # #return [uplevel 1 [list ::punk::match_exec $varspecs $rhs {*}$tail]] #} } configure_unknown #if client redefines 'unknown' after package require punk, they must call punk::configure_unknown afterwards. # #main Pipe initiator function - needed especially if 'unknown' not configured to interpret x.= x= etc #Should theoretically be slightly faster.. but pipelines are relatively slow until we can get pipeline compiling and optimisation. proc % {args} { set arglist [lassign $args assign] ;#tail, head if {$assign eq ".="} { tailcall {*}[list ::punk::pipeline .= "" "" {*}$arglist] } elseif {$assign eq "="} { tailcall {*}[list ::punk::pipeline = "" "" {*}$arglist] } #maint - punk::arg_is_script_shaped (inlined) if {[string first " " $assign] >= 0} { set is_script 1 } elseif {[string first \n $assign] >= 0} { set is_script 1 } elseif {[string first ";" $assign] >= 0} { set is_script 1 } elseif {[string first \t $assign] >= 0} { set is_script 1 } else { set is_script 0 } if {!$is_script && [string index $assign end] eq "="} { #set re_dotequals {^([^ \t\r\n=\{]*)\.=$} #set re_equals {^([^ \t\r\n=\{]*)=$} if {[regexp {^([^ \t\r\n=\{]*)\.=$} $assign _ returnvarspecs]} { set cmdlist [list ::punk::pipeline .= $returnvarspecs "" {*}$arglist] } elseif {[regexp {^([^ \t\r\n=\{]*)=$} $assign _ returnvarspecs]} { set cmdlist [list ::punk::pipeline = $returnvarspecs "" {*}$arglist] } else { error "pipesyntax punk::% unable to interpret pipeline '$args'" "% $args" [list pipesyntax unable_to_interpret] } } else { if {$is_script} { set cmdlist [list ::punk::pipeline "script" "" "" {*}$args] } else { set cmdlist [list ::punk::pipeline ".=" "" "" {*}$args] } } tailcall {*}$cmdlist #result-based mismatch detection can probably never work nicely.. #we need out-of-band method to detect mismatch. Otherwise we can't match on mismatch results! # set result [uplevel 1 $cmdlist] #pipeline result not guaranteed to be a proper list so we can't use list methods to directly look for 'binding mismatch' #.. but if we use certain string methods - we shimmer the case where the main result is a list #string match doesn't seem to change the rep.. though it does generate a string rep. #puts >>1>[rep $result] if {[catch {lrange $result 0 1} first2wordsorless]} { #if we can't get as a list then it definitely isn't the semi-structured 'binding mismatch' return $result } else { if {$first2wordsorless eq {binding mismatch}} { error $result } else { #puts >>2>[rep $result] return $result } } } proc ispipematch {args} { expr {[lindex [uplevel 1 [list pipematch {*}$args]] 0] eq "ok"} } #pipe initiator which will never raise an error *except for pipesyntax* , but always returns {ok {result something}} or {error {mismatch something}} or, for tcl errors {error {reason something}} proc pipematch {args} { #debug.punk.pipe {pipematch level [info level] levelinfo [info level 0]} 2 variable re_dot_assign variable re_assign set arglist [lassign $args assign] if {$assign eq ".="} { set cmdlist [list ::punk::pipeline .= "" "" {*}$arglist] } elseif {$assign eq "="} { set cmdlist [list ::punk::pipeline = "" "" {*}$arglist] } elseif {![punk::arg_is_script_shaped $assign] && [string index $assign end] eq "="} { #set re_dotequals {^([^ \t\r\n=\{]*)\.=$} #set re_equals {^([^ \t\r\n=\{]*)=$} if {[regexp {^([^ \t\r\n=]*)\.=.*} $assign _ returnvarspecs]} { set cmdlist [list ::punk::pipeline .= $returnvarspecs "" {*}$arglist] } elseif {[regexp {^([^ \t\r\n=]*)=.*} $assign _ returnvarspecs]} { set cmdlist [list $assign {*}$arglist] #set cmdlist [list ::punk::pipeline = $returnvarspecs "" {*}$arglist] } else { error "pipesyntax punk::pipematch unable to interpret pipeline '$args'" "pipematch $args" [pipesyntax unable_to_interpret] } } else { set cmdlist $args #script? #set cmdlist [list ::punk::pipeline .= "" "" {*}$args] } if {[catch {uplevel 1 $cmdlist} result erroptions]} { #puts stderr "pipematch erroptions:$erroptions" #debug.punk.pipe {pipematch error $result} 4 set ecode [dict get $erroptions -errorcode] if {[lrange $ecode 0 1] eq "binding mismatch"} { #error {reason xxx} should only be returned for underlying tcl errors. error {someotherkey xxx} for structured errors such as a binding mismatch #return [dict create error [dict create mismatch $result]] #puts stderr "pipematch converting error to {error {mismatch }}" return [list error [list mismatch $result]] } if {[lindex $ecode 0] eq "pipesyntax"} { #error $result return -options $erroptions $result } if {[lindex $ecode 0] eq "casematch"} { return $result } #return [dict create error [dict create reason $result]] return [list error [list reason $result]] } else { return [list ok [list result $result]] #debug.punk.pipe {pipematch result $result } 4 #return [dict create ok [dict create result $result]] } } proc pipenomatchvar {varname args} { if {[string first = $varname] >=0} { #first word "pipesyntax" is looked for by pipecase error "pipesyntax pipenomatch expects a simple varname as first argument" "pipenomatchvar $varname $args" [list pipesyntax expected_simple_varname] } #debug.punk.pipe {pipematch level [info level] levelinfo [info level 0]} 2 set assign [lindex $args 0] set arglist [lrange $args 1 end] if {[string first = $assign] >= 0} { variable re_dot_assign variable re_assign #what if we get passed a script block containing = ?? e.g {error x=a} if {$assign eq ".="} { set cmdlist [list ::punk::pipeline .= "" "" {*}$arglist] } elseif {$assign eq "="} { set cmdlist [list ::punk::pipeline = "" "" {*}$arglist] } elseif {[regexp $re_dot_assign $assign _ returnvarspecs rhs]} { set cmdlist [list ::punk::pipeline .= $returnvarspecs $rhs {*}$arglist] } elseif {[regexp $re_assign $assign _ returnvarspecs rhs]} { set cmdlist [list ::punk::pipeline = $returnvarspecs $rhs {*}$arglist] } else { debug.punk.pipe {[a+ yellow bold] Unexpected arg following pipenomatchvar variable [a+]} 0 set cmdlist $args #return [dict create error [dict create reason [dict create pipematch bad_first_word value $assign pipeline [list pipematch $assign {*}$args]]]] } } else { set cmdlist $args } upvar 1 $varname nomatchvar if {[catch {uplevel 1 $cmdlist} result erroptions]} { set ecode [dict get $erroptions -errorcode] debug.punk.pipe {[a+ yellow bold]pipematchnomatch error $result[a+]} 3 if {[lindex $ecode 0] eq "pipesyntax"} { set errordict [dict create error [dict create pipesyntax $result]] set nomatchvar $errordict return -options $erroptions $result } if {[lrange $ecode 0 1] eq "binding mismatch"} { #error {reason xxx} should only be returned for underlying tcl errors. error {someotherkey xxx} for structured errors such as a binding mismatch set errordict [dict create error [dict create mismatch $result]] set nomatchvar $errordict return -options $erroptions $result } set errordict [dict create error [dict create reason $result]] set nomatchvar $errordict #re-raise the error for pipeswitch to deal with return -options $erroptions $result } else { debug.punk.pipe {pipematchnomatch result $result } 4 set nomatchvar "" #uplevel 1 [list set $varname ""] #return raw result only - to pass through to pipeswitch return $result #return [dict create ok [dict create result $result]] } } #should only raise an error for pipe syntax errors - all other errors should be wrapped proc pipecase {args} { #debug.punk.pipe {pipecase level [info level] levelinfo [info level 0]} 9 set arglist [lassign $args assign] if {$assign eq ".="} { set cmdlist [list ::punk::pipeline .= "" "" {*}$arglist] } elseif {$assign eq "="} { #set cmdlist [list ::punk::pipeline = "" "" {*}$arglist] set cmdlist [list = {*}$arglist] } elseif {![punk::arg_is_script_shaped $assign] && [string first "=" $assign] >= 0} { #set re_dotequals {^([^ \t\r\n=\{]*)\.=$} #set re_equals {^([^ \t\r\n=\{]*)=$} if {[regexp {^([^ \t\r\n=]*)\.=.*} $assign _ returnvarspecs]} { set cmdlist [list ::punk::pipeline .= $returnvarspecs "" {*}$arglist] } elseif {[regexp {^([^ \t\r\n=]*)=.*} $assign _ returnvarspecs]} { set cmdlist [list $assign {*}$arglist] #set cmdlist [list ::punk::pipeline = $returnvarspecs "" {*}$arglist] } else { error "pipesyntax pipecase unable to interpret pipeline '$args'" } #todo - account for insertion-specs e.g x=* x.=/0* } else { #script? set cmdlist [list ::punk::pipeline .= "" "" {*}$args] } if {[catch {uplevel 1 [list if 1 $cmdlist]} result erroptions]} { #puts stderr "====>>> result: $result erroptions" set ecode [dict get $erroptions -errorcode] if {[lindex $ecode 0] eq "pipesyntax"} { #error $result return -options $erroptions $result } if {[lindex $ecode 0] eq "casenomatch"} { return -options $erroptions $result } if {[lrange $ecode 0 1] eq "binding mismatch"} { #error {reason xxx} should only be returned for underlying tcl errors. error {someotherkey xxx} for structured errors such as a binding mismatch #return [dict create error [dict create mismatch $result]] # #NOTE: casemismatch is part of the api for pipecase. It is a casemismatch rather than an error - because for a pipecase - a casemismatch is an expected event (many casemismatches - one match) return [dict create casemismatch $result] } #we can't always treat $result as a list - may be an error string which can't be represented as a list, and there may be no useful errorCode #todo - use errorCode instead if {[catch {lindex $result 0} word1]} { #tailcall error $result return -options $erroptions $result } else { if {$word1 in [list "switcherror" "funerror"]} { error $result "pipecase [lsearch -all -inline $args "*="]" } if {$word1 in [list "resultswitcherror" "resultfunerror"]} { #recast the error as a result without @@ok wrapping #use the tailcall return to stop processing other cases in the switch! tailcall return [dict create error $result] } if {$word1 eq "ignore"} { #suppress error, but use normal return return [dict create error [dict create suppressed $result]] } else { #normal tcl error #return [dict create error [dict create reason $result]] tailcall error $result "pipecase $args" [list caseerror] } } } else { tailcall return -errorcode [list casematch] [dict create ok [dict create result $result]] } } #note that pipeswitch deliberately runs in callers scope to have direct access to variables - it is akin to a control structure. #It also - somewhat unusually accepts args - which we provide as 'switchargs' #This is unorthodox/risky in that it will clobber any existing var of that name in callers scope. #Solve using documentation.. consider raising error if 'switchargs' already exists, which would require user to unset switchargs in some circumstances. proc pipeswitch {pipescript args} { #set nextargs $args #unset args #upvar args upargs #set upargs $nextargs upvar switchargs switchargs set switchargs $args uplevel 1 [::list ::if 1 $pipescript] } #static-closure version - because we shouldn't be writing back to calling context vars directly #Tcl doesn't (2023) have mutable closures - but for functional pipeline composition - we probably don't want that anyway! #pipeswitchc is preferable to pipeswitch in that we can access context without risk of affecting it, but is less performant. (particularly in global scope.. but that probably isn't an important usecase) proc pipeswitchc {pipescript args} { set binding {} if {[info level] == 1} { #up 1 is global set get_vars [list info vars] } else { set get_vars [list info locals] } set vars [uplevel 1 {*}$get_vars] set posn [lsearch $vars switchargs] set vars [lreplace $vars $posn $posn] foreach v $vars { upvar 1 $v var if {(![array exists var]) && [info exists var]} { lappend binding [list $v $var] ;#values captured as defaults for apply args. } } lappend binding [list switchargs $args] apply [list $binding $pipescript [uplevel 1 {::namespace current}]] } proc pipedata {data args} { #puts stderr "'$args'" set r $data for {set i 0} {$i < [llength $args]} {incr i} { set e [lindex $args $i] if {[catch {llength $e} seglen]} { #not a list - assume script and run anyway set r [apply [list {data} $e] $r] } else { if {[llength $e] == 1} { if {$e eq {>}} { #output to calling context. only pipedata return value and '> varname' should affect caller. incr i uplevel 1 [list set [lindex $args $i] $r] } elseif {$e in {% pipematch ispipematch}} { incr i set e2 [lindex $args $i] #set body [list $e {*}$e2] #append body { $data} set body [list $e {*}$e2] append body { {*}$data} set applylist [list {data} $body] #puts stderr $applylist set r [apply $applylist $r] } elseif {$e in [list pipeswitch pipeswitchc]} { #pipeswitch takes a script not a list. incr i set e2 [lindex $args $i] set body [list $e $e2] #pipeswitch takes 'args' - so expand $data when in pipedata context append body { {*}$data} #use applylist instead of uplevel when in pipedata context! #can use either switchdata/data but not vars in calling context of 'pipedata' command. #this is consistent with pipeswitch running in a % / .= pipeline which can only access vars in immediate calling context. set applylist [list {data} $body] #puts stderr $applylist set r [apply $applylist $r] } else { #puts "other single arg: [list $e $r]" append e { $data} set r [apply [list {data} $e] $r] } } elseif {[llength $e] == 0} { #do nothing - pass data through #leave r as is. } else { set r [apply [list {data} $e] $r] } } } return $r } #todo - implement colour resets like the perl module: #https://metacpan.org/pod/Text::ANSI::Util #(saves up all ansi color codes since previus color reset and replays the saved codes after our highlighting is done) proc ansi+ {args} { variable colour_disabled if {$colour_disabled == 1} { return } tailcall ::shellfilter::ansi::+ {*}$args } proc colour {{onoff {}}} { variable colour_disabled if {[string length $onoff]} { set onoff [string tolower $onoff] if {$onoff in [list 1 on true yes]} { interp alias "" a+ "" punk::ansi+ set colour_disabled 0 } elseif {$onoff in [list 0 off false no]} { interp alias "" a+ "" control::no-op set colour_disabled 1 } else { error "punk::colour expected 0|1|on|off|true|false|yes|no" } } catch {repl::reset_prompt} return [expr {!$colour_disabled}] } proc scriptlibpath {{shortname {}} args} { upvar ::punk::config::running running_config set scriptlib [dict get $running_config scriptlib] if {[string match "lib::*" $shortname]} { set relpath [string map [list "lib::" "" "::" "/"] $shortname] set relpath [string trimleft $relpath "/"] set fullpath $scriptlib/$relpath } else { set shortname [string trimleft $shortname "/"] set fullpath $scriptlib/$shortname } return $fullpath } #todo - something better - 'previous' rather than reverting to startup proc channelcolors {{onoff {}}} { upvar ::punk::config::running running_config upvar ::punk::config::startup startup_config if {![string length $onoff]} { return [list stdout [dict get $running_config color_stdout] stderr [dict get $running_config color_stderr]] } else { set lower_onoff [string tolower $onoff] if {$lower_onoff in [list true on 1]} { dict set running_config color_stdout [dict get $startup_config color_stdout] dict set running_config color_stderr [dict get $startup_config color_stderr] } elseif {$lower_onoff in [list false off 0]} { dict set running_config color_stdout "" dict set running_config color_stderr "" } else { error "channelcolors: invalid value $onoff - expected true|false|on|off|1|0" } } return [list stdout [dict get $running_config color_stdout] stderr [dict get $running_config color_stderr]] } #useful for aliases e.g treemore -> xmore tree proc xmore {args} { if {[llength $args]} { {*}$args | more } else { error "usage: punk::xmore args where args are run as {*}\$args | more" } } #review - is this intended to be useful/callable on non-windows platforms? #it should in theory be useable from another platform that wants to create a path for use on windows. proc winpath {path} { #NOTE: tcl file exists gives different answers on windows for paths like /c depending on cwd (presumably based on file pathtype of volumerelative) #This is add odds with attempting to navigate on a windows system which has cygwin, wsl etc... It also makes it difficult for functions intended to operate independent of CWD. #e.g there is potential confusion when there is a c folder on c: drive (c:/c) #I will attempt to provide a coherent operation for winpath ./ ../ etc , but it may disallow for example; change to /something or /x where these don't match a driveletter or /mnt #whereas tcl may allow cd to /something if a something folder happens to exist on the current volume based on cwd. #I think it's preferable to require an explicit driveletter /x or /mnt when using unix-like paths on windows - but practical considerations may prove me wrong.. #It's possible that this function should also ignore the current set of driveletters - and operate completely independent of whether a path actually exists #This makes it hard to use things like 'file normalize' - which also looks at things like current volume. # #Note for example the results of 'which' grep on windows can produce a path like /c/Users/somewhere/bin/grep #which tcl's file normalize may change to C:/c/Users or X:/c/Users - based on current volumen. Given that C:/c might exist - this can be problematic in a couple of ways. #The mixing of unix-like and windows commands on the same machine is a large part of the problem.. but this mix is now common # #convert /c/etc to C:/etc set re_slash_x_slash {^/([[:alpha:]]){1}/.*} set re_slash_else {^/([[:alpha:]]*)(.*)} set volumes [file volumes] #exclude things like //zipfs:/ set driveletters [list] foreach v $volumes { if {[regexp {^([[:alpha:]]){1}:/$} $v _ letter]} { lappend driveletters $letter } } #puts stderr "->$driveletters" if {[regexp $re_slash_x_slash $path _ letter]} { #upper case appears to be windows canonical form set path [string toupper $letter]:/[string range $path 3 end] } elseif {[regexp {^/mnt|MNT/([[:alpha:]]){1}/.*} $path _ letter]} { set path [string toupper $letter]:/[string range $path 7 end] } elseif {[regexp $re_slash_else $path _ firstpart remainder]} { #could be for example /c or /something/users if {[string length $firstpart] == 1} { set letter $firstpart set path [string toupper $letter]:/ } else { #attempt to use cygpath helper if {![catch { set cygpath [runout -n cygpath -w $path] ;#! set ::punk::last_run_display [list] ;#hack - review shouldn't really be necessary.. but because we call winpath from ./ - the repl looks for last_run_display } errM]} { set path [string map [list "\\" "/"] $cygpath] } else { error "Path '$path' does not appear to be in a standard form. For unix-like paths on windows such as /x, x must correspond to a drive letter. Consider installing cygwin's cygpath tool to see if that helps." } } } #puts stderr "=> $path" #things like 'which' seem to return a path minus the .exe - so we'll just test the containing folder # #By now file normalize shouldn't do too many shannanigans related to cwd.. #We want it to look at cwd for relative paths.. but we don't consider things like /c/Users to be relative even on windows if {![file exists [file dirname $path]]} { set path [file normalize $path] #may still not exist.. that's ok. } #file normalize may change backslashes to forward slashes.. including things like the special \\?\ prefix which is intended to stop windows api from parsing a name #2023 - this is ok as //?/ also seems to work.. but it is unclear if that is because Tcl is re-converting to backslashes if {[punk::winpath_illegalname_test $path]} { set path [punk::winpath_illegalname_fix $path] } return $path } proc windir {path} { return [file dirname [punk::winpath $path]] } #environment path as list # #return *appendable* pipeline - i.e no args via <| proc path_list_pipe {{glob *}} { if {$::tcl_platform(platform) eq "windows"} { set sep ";" } else { # : ok for linux/bsd ... mac? set sep ":" } set cond [string map [list $glob] {expr {[string length $item] && [string match $item]}}] #env members such as ''path' not case sensitive on windows - but are on some other platforms (at least FreeBSD) return [list .= {set ::env(PATH)} |> .=/2 string trimright $sep |> .=/1 split $sep |> list_filter_cond $cond ] } proc path_list {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe } proc path {{glob *}} { set pipe [punk::path_list_pipe $glob] {*}$pipe |> list_as_lines } #------------------------------------------------------------------- #sh 'test' equivalent - to be used with exitcode of process # #single evaluation to get exitcode proc sh_test {args} { set a1 [lindex $args 0] if {$a1 in [list -b -c -d -e -f -h -L -s -S -x -w]} { set a2 [lindex $args 1] set attrinfo [file attributes $a2] if {[dict exists $attrinfo -vfs] && [dict get $attrinfo -vfs] == 1} { puts stderr "WARNING: external 'test' being called on vfs path. External command will probably not have access to the vfs. Use 'TEST' for Tcl view of vfs mounted filesystems." } } tailcall run test {*}$args } #whether v is an integer from perspective of unix test command. #can be be bigger than a tcl int or wide ie bignum - but must be whole number #test doesn't handle 1.0 - so we shouldn't auto-convert proc is_sh_test_integer {v} { if {[string first . $v] >=0 || [string first e $v] >= 0} { return false } #if it is double but not sci notation and has no dots - then we can treat as a large integer for 'test' if {[string is double -strict $v]} { return true } else { return false } } #can use double-evaluation to get true/false #faster tcl equivalents where possible to accuratley provide, and fallthrough to sh for compatibility of unimplemented #The problem with fallthrough is that sh/bash etc have a different view of existant files #e.g unix files such as /dev/null vs windows devices such as CON,PRN #e.g COM1 is mapped as /dev/ttyS1 in wsl (?) #Note also - tcl can have vfs mounted file which will appear as a directory to Tcl - but a file to external commands! #We will stick with the Tcl view of the file system. #User can use their own direct calls to external utils if #Note we can't support $? directly in Tcl - script would have to test ${?} or use [set ?] proc sh_TEST {args} { upvar ? lasterr set lasterr 0 set a1 [lindex $args 0] set a2 [lindex $args 1] set a3 [lindex $args 2] set fileops [list -b -c -d -e -f -h -L -s -S -x -w] if {[llength $args] == 1} { #equivalent of -n STRING set boolresult [expr {[string length $a1] != 0}] } elseif {[llength $args] == 2} { if {$a1 in $fileops} { if {$::tcl_platform(platform) eq "windows"} { #e.g trailing dot or trailing space if {[punk::winpath_illegalname_test $a2]} { #protect with \\?\ to stop windows api from parsing #will do nothing if already prefixed with \\?\ set a2 [punk::winpath_illegalname_fix $a2] } } } switch -- $a1 { -b { #dubious utility on FreeBSD, windows? #FreeBSD has dropped support for block devices - stating 'No serious applications rely on block devices' #Linux apparently uses them though if{[file exists $a2]} { set boolresult [expr {[file type $a2] eq "blockSpecial"}] } else { set boolresult false } } -c { #e.g on windows CON,NUL if {[file exists $a2]} { set boolresult [expr {[file type $a2] eq "characterSpecial"}] } else { set boolresult false } } -d { set boolresult [file isdirectory $a2] } -e { set boolresult [file exists $a2] } -f { #e.g on windows CON,NUL if {[file exists $a2]} { set boolresult [expr {[file type $a2] eq "file"}] } else { set boolresult false } } -h - -L { set boolresult [expr {[file type $a2] eq "link"}] } -s { set boolresult [expr {[file exists $a2] && ([file size $a2] > 0 )}] } -S { if {[file exists $a2]} { set boolresult [expr {[file type $a2] eq "socket"}] } else { set boolresult false } } -x { set boolresult [expr {[file exists $a2] && [file executable $a2]}] } -w { set boolresult [expr {[file exists $a2] && [file writable $a2]}] } -z { set boolresult [expr {[string length $a2] == 0}] } -n { set boolresult [expr {[string length $a2] != 0}] } default { puts stderr "sh_TEST: delegating 'test $args' to external 'test' command" #set boolresult [apply {arglist {uplevel #0 [runx test {*}$arglist]} ::} $args] set callinfo [runx test {*}$args] set errinfo [dict get $callinfo stderr] set exitcode [dict get $callinfo exitcode] if {[string length $errinfo]} { puts stderr "sh_TEST error in external call to 'test $args': $errinfo" set lasterr $exitcode } if {$exitcode == 0} { set boolresult true } else { set boolresult false } } } } elseif {[llength $args] == 3} { switch -- $a2 { "=" { #test does string comparisons set boolresult [string equal $a1 $a3] } "!=" { #string comparison set boolresult [expr {$a1 ne $a3}] } "-eq" { #test expects a possibly-large integer-like thing #shell scripts will if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 == $a3}] } "-ge" { if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 >= $a3}] } "-gt" { if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 > $a3}] } "-le" { if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 <= $a3}] } "-lt" { if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 < $a3}] } "-ne" { if {![is_sh_test_integer $a1]} { puts stderr "sh_TEST: invalid integer '$a1'" set lasterr 2 return false } if {![is_sh_test_integer $a3]} { puts stderr "sh_TEST: invalid integer '$a3'" set lasterr 2 return false } set boolresult [expr {$a1 != $a3}] } default { puts stderr "sh_TEST: delegating 'test $args' to external 'test' command" #set boolresult [apply {arglist {uplevel #0 [runx test {*}$arglist]} ::} $args] set callinfo [runx test {*}$args] set errinfo [dict get $callinfo stderr] set exitcode [dict get $callinfo exitcode] if {[string length $errinfo]} { puts stderr "sh_TEST error in external call to 'test $args': $errinfo" set lasterr $exitcode } if {$exitcode == 0} { set boolresult true } else { set boolresult false } } } } else { puts stderr "sh_TEST: delegating 'test $args' to external 'test' command" #set boolresult [apply {arglist {uplevel #0 [runx test {*}$arglist]} ::} $args] set callinfo [runx test {*}$args] set errinfo [dict get $callinfo stderr] set exitcode [dict get $callinfo exitcode] if {[string length $errinfo]} { puts stderr "sh_TEST error in external call to 'test $args': $errinfo" set lasterr $exitcode } if {$exitcode == 0} { set boolresult true } else { set boolresult false } } #normalize 1,0 etc to true,false #we want to make it obvious we are not just reporting exitcode 0 for example - which represents true in tcl. if {$boolresult} { return true } else { if {$lasterr == 0} { set lasterr 1 } return false } } proc sh_echo {args} { tailcall run echo {*}$args } proc sh_ECHO {args} { tailcall apply {arglist {uplevel #0 [run echo {*}$arglist]} ::} $args } #sh style true/false for process exitcode. 0 is true - everything else false proc exitcode {args} { set c [lindex $args 0] if {[string is integer -strict $c]} { #return [expr {$c == 0}] #return true/false to make it clearer we are outputting tcl-boolean inverse mapping from the shell style 0=true if {$c == 0} { return true } else { return false } } else { return false } } #------------------------------------------------------------------- namespace export help aliases alias nsjoin nsprefix cdwin cdwindir dirfiles dirfiles_dict exitcode winpath windir % pipedata pipecase pipeline pipematch pipeswitch pipeswitchc pipecase linelist linesort inspect list_as_lines is_list_all_in_list is_list_all_ni_list val treemore namespace ensemble create proc hasglobs {str} { expr {[string first * $str]>=0 || [string first ? $str]>=0} } #todo - package up as navns proc corp {path} { #thanks to Richard Suchenwirth for the original - wiki.tcl-lang.org/page/corp #Note: modified here to support aliases and relative/absolute name (with respect to namespace .ie ::name vs name) if {[info exists ::auto_index($path)]} { set body "# $::auto_index($path)\n" } else { set body "" } if {[string match ::* $path]} { set targetns [nsprefix $path] set name [nstail $path] } else { set thispath [uplevel 1 [list nsthis $path]] set targetns [nsprefix $thispath] set name [nstail $thispath] #set upns [uplevel 1 [list namespace current]] } #puts stderr "corp upns:$upns" #set name [string trim $name :] #set origin [namespace origin ${upns}::$name] set origin [punk::nseval $targetns [list ::namespace origin $name]] #An renamed alias may exist that is the same name as a proc that was created later.. so we must check for the proc before looking into aliases! if {$origin ni [info procs $origin]} { #It seems an interp alias of "::x"" behaves the same as "x" #But we can't create both at the same time - and they have to be queried by the exact name. #So we query for alias with and without leading :: set alias_qualified [interp alias {} [string trim $origin :]] set alias_unqualified [interp alias {} $origin] if {[string length $alias_qualified] && [string length $alias_unqualified]} { #our assumptions are wrong.. change in tcl version? puts stderr "corp: Found alias for unqualified name:'[string trim $origin :]' and qualified name: '$origin' - unexpected (assumed impossible as at Tcl 8.6)" if {$alias_qualified ne $alias_unqalified} { } else { set alias $alias_unqualified } } else { set alias ${alias_qualified}${alias_unqualified} ;#concatenate - as at least one should be empty } if {[string length $alias]} { #todo - consider following alias-chain to ultimate proc? #it can always be manually done with: #.= corp $name |/1> corp |/1> corp .. #depending on number of aliases in the chain return [list alias {*}$alias] } } if {[nsprefix $origin] ne [nsprefix [nsjoin ${targetns} $name]]} { append body "# namespace origin $origin" } append body [info body $origin] set argl {} foreach a [info args $origin] { if {[info default $origin $a def]} { lappend a $def } lappend argl $a } list proc [nsjoin ${targetns} $name] $argl $body } proc nsjoin {prefix name} { if {[string match ::* $name]} { if {[string length $prefix]} { error "nsjoin: won't join non-empty prefix to absolute namespace path '$name'" } return $name } if {$prefix eq "::"} { return ::$name } return ${prefix}::$name } proc nsjoinall {prefix args} { #if {![llength $args]} { # error "usage: nsjoinall prefix relativens \[relativens ...\]" #} set segments [list $prefix] foreach sub $args { if {[string match ::* $sub]} { if {[string length [concat {*}$segments]]} { error "nsjoin: won't join non-empty namespace prefix to absolute namespace path '$sub'" } } lappend segments $sub } set nonempty_segments [list] foreach s $segments { if {[string length $s]} { lappend nonempty_segments $s } } if {$prefix eq "::"} { return ::[join [lrange $nonempty_segments 1 end] ::] } return [join $nonempty_segments ::] } proc nsprefix {{name ""}} { set rawprefix [string range $name 0 end-[string length [punk::nstail $name]]] if {$rawprefix eq "::"} { return $rawprefix } else { return [string trimright $rawprefix :] } } #namespace tail which handles :::cmd ::x:::y ::x:::/y etc #todo - raise error for unexpected sequences such as :::: or more than 2 colons together. proc nstail {nspath args} { set mapped [string map [list :: \u0FFF] $nspath] set parts [split $mapped \u0FFF] set defaults [list -strict 0] set opts [dict merge $defaults $args] set strict [dict get $opts -strict] if {$strict} { foreach p $parts { if {[string match :* $p]} { error "nstail unpaired colon ':' in $nspath" } } } #e.g ::x::y:::z should return ":z" return [lindex $parts end] } #return a list of namespace segments - always with leading empty string for fully qualified namespace (ie for ::x) #'supports' weird namespaces /commands such as :x :::x ::x:::y #Can be used to either suppor use of such namespaces/commands - or as part of validation to disallow them #as opposed to silent behaviour of Tcl namespace commands which don't handle them consistently (for tcl 8.x anyway Review tcl 9) #Note that for ::x:: the trailing :: cannot represent a trailing namespace part being an empty string #This is because Tcl's 'namespace eval "" ""' reports 'only global namespace can have empty name' # proc nsparts {nspath} { set mapped [string map [list :: \u0FFF] $nspath] set parts [split $mapped \u0FFF] if {[lindex $parts end] eq ""} { } return $parts } #review ??? proc ns_relative_to_location {name} { if {[string match ::* $name]} { error "ns_relative_to_location accepts a relative namespace name only ie one without leading ::" } } proc ns_absolute_to_location {name} { } interp alias {} nsjoin {} punk::nsjoin interp alias {} nsprefix {} punk::nsprefix interp alias {} nstail {} punk::nstail #tcl 8.x has creative writing var weirdness.. tcl 9 is likely to differ proc nsvars {{nsglob "*"}} { set ns_absolute [uplevel 1 [list punk::nspath_here_absolute $nsglob]] #set commandns [uplevel 1 [list namespace current]] set searchns [nsprefix $ns_absolute] ;#the ns within which we want to searchns set searchall [nsjoin $searchns *] ;#will correctly form ::* or ::childns::* set rawresult [info vars $searchall] ;#will not find vars in ::x:::y etc. - seems to be impossible in tcl 8.x set matched_fullpath [list] foreach r $rawresult { lappend matched_fullpath [nstail $r] } set location [nsprefix $ns_absolute] set tailmatch [nstail $ns_absolute] set raw_matched_in_ns [punk::nseval $location [list ::info vars $tailmatch]] #NOTE: tcl <9 will read vars from global namespace - so we are only checking the intersection here #(this is due to info vars ::etc:::blah failing to handle additional colon) set matched_in_ns [list] set result [list] foreach r $raw_matched_in_ns { set m [nstail $r] lappend matched_in_ns $m if {$m in $matched_fullpath} { lappend result $m } } return [list_as_lines [lsort $result]] #.= lsort $result |> list_as_lines } interp alias {} nsvars {} punk::nsvars interp alias {} nsorigin {} apply {ns {namespace origin [uplevel 1 nsthis $ns]} ::} #todo - walk up each ns - testing for possibly weirdly named namespaces proc nsexists {nspath} { } #create possibly nested namespace structure - but only if not already existant proc n/new {args} { variable ns_current if {![llength $args]} { error "usage: :/new \[ ...\]" } set a1 [lindex $args 0] set is_absolute [string match ::* $a1] if {$is_absolute} { set nspath [nsjoinall {*}$args] } else { if {[string match :* $a1]} { puts stderr "n/new WARNING namespace with leading colon '$a1' is likely to have unexpected results" } set nspath [nsjoinall $ns_current {*}$args] } set ns_exists [punk::nseval [punk::nsprefix $nspath] [list namespace exists [punk::nstail $nspath] ]] if {$ns_exists} { error "Namespace $nspath already exists" } #namespace eval [nsprefix $nspath] [list namespace eval [nstail $nspath] {}] punk::nseval [nsprefix $nspath] [list ::namespace eval [nstail $nspath] {}] n/ $nspath } #nn/ ::/ nsup/ - back up one namespace level proc nsup/ {v args} { variable ns_current if {$ns_current eq "::"} { puts stderr "Already at global namespace '::'" } else { set out "" set nsq [nsprefix $ns_current] if {$v eq "/"} { set out [punk::get_nslist -match [nsjoin $nsq *] -types [list children]] } else { set out [punk::get_nslist -match [nsjoin $nsq *] -types [list all]] } #set out [punk::nslist [nsjoin $nsq *]] set ns_current $nsq append out "\n$ns_current" return $out } } #experimental #is there ever any difference to {namespace current}? #interp alias {} nsthis {} .= .= namespace code {namespace current} |> .=* <0/#| #interp alias {} nsthis {} namespace current interp alias {} nsthis {} punk::nspath_here_absolute proc nspath_here_absolute {{nspath ""}} { set path_is_absolute [expr {[string match ::* $nspath]}] if {$path_is_absolute} { return $nspath } set ns_caller [uplevel 1 {namespace current}] if {![string length $nspath]} { return $ns_caller } return [punk::nsjoin $ns_caller $nspath] } proc nspath_to_absolute {nspath base} { set path_is_absolute [expr {[string match ::* $nspath]}] if {$path_is_absolute} { return $nspath } if {![string length $nspath]} { return $base } return [punk::nsjoin $base $nspath] } #cli command - impure - relies on caller/ns_current proc nslist_dict {{glob "*"}} { set ns_absolute [uplevel 1 [list punk::nspath_here_absolute $glob]] return [get_nslist_dict $ns_absolute] } proc nslist_dict1 {{glob "*"}} { variable ns_current ;#keep fully qualified ie :: or ::etc set ns_caller [uplevel 1 {namespace current}] puts "nslist_dict ns_caller: $ns_caller (ns_current: $ns_current)" set glob_is_absolute [expr {[string match ::* $glob]}] set globquals [namespace qualifiers $glob] if {[string length $globquals]} { if {$glob_is_absolute} { set fqpath $globquals } else { set fqpath ${ns_caller}::${globquals} } } else { if {$glob_is_absolute} { set fqpath :: } else { set fqpath $ns_caller } } #puts stderr ">>fqpath $fqpath" set globtail [nstail $glob] if {[hasglobs $globtail]} { set location $fqpath set glob $globtail } else { if {$fqpath eq "::"} { set location ::${globtail} } else { if {[string length $globtail]} { set location ${fqpath}::${globtail} } else { set location ${fqpath} } } set glob * } return [get_nslist_dict ${location}::$glob] } #recursive nseval - for introspection of weird namespace trees #approx 10x slower than normal namespace eval - but still only a few microseconds.. fine for repl introspection proc nseval_script {location} { set parts [nsparts $location] if {[lindex $parts 0] eq ""} { lset parts 0 :: } if {[lindex $parts end] eq ""} { set parts [lrange $parts 0 end-1] } set body "" set i 0 set tails [lrepeat [llength $parts] ""] foreach ns $parts { set cmdlist [list namespace eval $ns] set t "" if {$i > 0} { append body " " } append body $cmdlist if {$i == ([llength $parts] -1)} { append body "