From 7b2dd566668ae15d0a4126e7fc4cfa6cf643b786 Mon Sep 17 00:00:00 2001 From: Julian Noble Date: Thu, 3 Oct 2024 15:50:04 +1000 Subject: [PATCH] update src/vfs_vfscommon modules --- src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm | 10 + src/vfs/_vfscommon/modules/punk/args-0.1.0.tm | 145 ++++- src/vfs/_vfscommon/modules/punk/config-0.1.tm | 5 +- .../_vfscommon/modules/punk/fileline-0.1.0.tm | 1 - .../utility/scriptappwrappers/multishell.cmd | 6 +- src/vfs/_vfscommon/modules/punk/repl-0.1.tm | 19 +- src/vfs/_vfscommon/modules/punk/trie-0.1.0.tm | 600 ++++++++++++++++++ 7 files changed, 753 insertions(+), 33 deletions(-) create mode 100644 src/vfs/_vfscommon/modules/punk/trie-0.1.0.tm diff --git a/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm b/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm index 8424ce07..d0fdc8ec 100644 --- a/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/fauxlink-0.1.0.tm @@ -66,6 +66,16 @@ # "my-program-files#++server+c+Program%20Files.fxlnk" #If we needed the old-style literal %20 it would become # "my-program-files#++server+c+Program%2520Files.fxlnk" +# +# The file:// scheme on windows supposedly *does* decode %xx (for use in a browser) +# e.g +# pfiles#file%3a++++localhost+c+Program%2520files +# The browser will work with literal spaces too though - so it could just as well be: +# pfiles#file%3a++++localhost+c+Program%20files +#windows may default to using explorer.exe instead of a browser for file:// urls though +#and explorer doesn't want the literal %20. It probably depends what API the file:// url is to be passed to? +#in a .url shortcut either literal space or %20 will work ie %xx values are decoded + #*** !doctools diff --git a/src/vfs/_vfscommon/modules/punk/args-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/args-0.1.0.tm index 5e270ac8..c8a6ec84 100644 --- a/src/vfs/_vfscommon/modules/punk/args-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/args-0.1.0.tm @@ -201,6 +201,7 @@ #[para] packages used by punk::args #[list_begin itemized] package require Tcl 8.6- +#optional? punk::trie #*** !doctools #[item] [package {Tcl 8.6-}] @@ -293,6 +294,7 @@ tcl::namespace::eval punk::args { -validate_without_ansi 0\ -strip_ansi 0\ -nocase 0\ + -choiceprefix 1\ -multiple 0\ ] set valspec_defaults [tcl::dict::create\ @@ -301,8 +303,12 @@ tcl::namespace::eval punk::args { -allow_ansi 1\ -validate_without_ansi 0\ -strip_ansi 0\ + -nocase 0\ + -choiceprefix 1\ -multiple 0\ ] + #we need a -choiceprefix default even though it often doesn't apply so we can look it up to display in Help if there are -choices + #default to 1 for convenience #checks with no default #-minlen -maxlen -range @@ -415,11 +421,11 @@ tcl::namespace::eval punk::args { -anyopts { set opt_any $v } - -minlen - -maxlen - -range - -choices - -choicelabels { + -minlen - -maxlen - -range - -choices - -choicelabels - -choiceprefix { #review - only apply to certain types? tcl::dict::set optspec_defaults $k $v } - -nominlen - -nomaxlen - -norange - -nochoices - -nochoicelabels { + -nominlen - -nomaxlen - -norange - -nochoices - -nochoicelabels - -nocase { if {$v} { tcl::dict::unset optspec_defaults $k } @@ -459,7 +465,7 @@ tcl::namespace::eval punk::args { tcl::dict::set optspec_defaults $k $v } default { - set known { -any -anyopts -minlen -maxlen -range -choices -choicelabels\ + set known { -any -anyopts -minlen -maxlen -range -choices -choicelabels -choiceprefix -nocase\ -nominlen -nomaxlen -norange -nochoices -nochoicelabels\ -type -optional -allow_ansi -validate_without_ansi -strip_ansi -multiple\ } @@ -479,7 +485,7 @@ tcl::namespace::eval punk::args { -maxvalues { set val_max $v } - -minlen - -maxlen - -range - -choices - -choicelabels { + -minlen - -maxlen - -range - -choices - -choicelabels - -choiceprefix - -nocase { #review - only apply to certain types? tcl::dict::set valspec_defaults $k $v } @@ -520,7 +526,7 @@ tcl::namespace::eval punk::args { } default { set known { -min -minvalues -max -maxvalues\ - -minlen -maxlen -range -choices -choicelabels\ + -minlen -maxlen -range -choices -choicelabels -choiceprefix -nocase\ -nominlen -nomaxlen -norange -nochoices -nochoicelabels\ -type -optional -allow_ansi -validate_without_ansi -strip_ansi -multiple\ } @@ -596,12 +602,12 @@ tcl::namespace::eval punk::args { } } } - -default - -solo - -range - -choices - -choicelabels - -minlen - -maxlen - -nocase - -optional - -multiple - -validate_without_ansi - -allow_ansi - -strip_ansi - -help - -ARGTYPE { + -default - -solo - -range - -choices - -choiceprefix - -choicelabels - -choiceprefix - -minlen - -maxlen - -nocase - -optional - -multiple - -validate_without_ansi - -allow_ansi - -strip_ansi - -help - -ARGTYPE { #review -solo 1 vs -type none ? tcl::dict::set spec_merged $spec $specval } default { - set known_argspecs [list -default -type -range -choices -nocase -optional -multiple -validate_without_ansi -allow_ansi -strip_ansi -help] + set known_argspecs [list -default -type -range -choices -choiceprefix -nocase -optional -multiple -validate_without_ansi -allow_ansi -strip_ansi -help] error "punk::args::get_dict - unrecognised key '$spec' in specifications for argument '$argname' Known option specification keys: $known_argspecs" } } @@ -752,7 +758,28 @@ tcl::namespace::eval punk::args { #set greencheck [a+ web-limegreen]\u2713[a] set greencheck [a+ brightgreen]\u2713[a] - foreach arg [dict get $spec_dict opt_names] { + if {![catch {package require punk::trie}]} { + set opt_names_display [list] + set trie [punk::trie::trieclass new {*}[dict get $spec_dict opt_names]] + set idents [dict get [$trie shortest_idents ""] scanned] + $trie destroy + set M "\x1b\[32m" ;#mark in green + set RST "\x1b\[m" + foreach c [dict get $spec_dict opt_names] { + set id [dict get $idents $c] + if {$id eq $c} { + lappend opt_names_display $M$c$RST + } else { + set idlen [string length $id] + lappend opt_names_display "$M[string range $c 0 $idlen-1]$RST[string range $c $idlen end]" + } + } + } else { + set opt_names_display [dict get $spec_dict opt_names] + } + + + foreach argshow $opt_names_display arg [dict get $spec_dict opt_names] { set arginfo [dict get $spec_dict arg_info $arg] if {[dict exists $arginfo -default]} { #set default $c_default[dict get $arginfo -default] @@ -763,14 +790,47 @@ tcl::namespace::eval punk::args { set help [punk::lib::dict_getdef $arginfo -help ""] if {[dict exists $arginfo -choices]} { if {$help ne ""} {append help \n} - append help "Choices: [dict get $arginfo -choices]" + if {[dict get $arginfo -nocase]} { + set casemsg " (case insensitive)" + } else { + set casemsg " (case sensitive)" + } + if {[dict get $arginfo -choiceprefix]} { + set prefixmsg " (choice prefix allowed)" + } else { + set prefixmsg "" + } + append help "Choices$prefixmsg$casemsg" + if {[catch {package require punk::trie}]} { + append help "\n " [join [dict get $arginfo -choices] "\n "] + } else { + if {[catch { + set trie [punk::trie::trieclass new {*}[dict get $arginfo -choices]] + set idents [dict get [$trie shortest_idents ""] scanned] + $trie destroy + set M "\x1b\[32m" ;#mark in green + set RST "\x1b\[m" + foreach c [dict get $arginfo -choices] { + set id [dict get $idents $c] + if {$id eq $c} { + append help "\n " "$M$c$RST" + } else { + set idlen [string length $id] + append help "\n " "$M[string range $c 0 $idlen-1]$RST[string range $c $idlen end]" + } + } + } errM]} { + puts stderr "prefix marking failed\n$errM" + append help "\n " [join [dict get $arginfo -choices] "\n "] + } + } } if {[punk::lib::dict_getdef $arginfo -multiple 0]} { set multiple $greencheck } else { set multiple "" } - $t add_row [list $arg [dict get $arginfo -type] $default $multiple $help] + $t add_row [list $argshow [dict get $arginfo -type] $default $multiple $help] if {$arg eq $badarg} { $t configure_row [expr {[$t row_count]-1}] -ansibase $c_badarg } @@ -785,7 +845,40 @@ tcl::namespace::eval punk::args { set help [punk::lib::dict_getdef $arginfo -help ""] if {[dict exists $arginfo -choices]} { if {$help ne ""} {append help \n} - append help "Choices: [dict get $arginfo -choices]" + if {[dict get $arginfo -nocase]} { + set casemsg " (case insensitive)" + } else { + set casemsg " (case sensitive)" + } + if {[dict get $arginfo -choiceprefix]} { + set prefixmsg " (choice prefix allowed)" + } else { + set prefixmsg "" + } + append help "Choices$prefixmsg$casemsg" + if {[catch {package require punk::trie}]} { + append help "\n " [join [dict get $arginfo -choices] "\n "] + } else { + if {[catch { + set trie [punk::trie::trieclass new {*}[dict get $arginfo -choices]] + set idents [dict get [$trie shortest_idents ""] scanned] + $trie destroy + set M "\x1b\[32m" ;#mark in green + set RST "\x1b\[m" + foreach c [dict get $arginfo -choices] { + set id [dict get $idents $c] + if {$id eq $c} { + append help "\n " "$M$c$RST" + } else { + set idlen [string length $id] + append help "\n " "$M[string range $c 0 $idlen-1]$RST[string range $c $idlen end]" + } + } + } errM]} { + puts stderr "prefix marking failed\n$errM" + append help "\n " [join [dict get $arginfo -choices] "\n "] + } + } } if {[punk::lib::dict_getdef $arginfo -multiple 0]} { set multiple $greencheck @@ -1429,20 +1522,38 @@ tcl::namespace::eval punk::args { } if {$has_choices} { #todo -choicelabels - set choices [tcl::dict::get $thisarg -choices] - set nocase [tcl::dict::get $thisarg -nocase] + set choices [tcl::dict::get $thisarg -choices] + set choiceprefix [tcl::dict::get $thisarg -choiceprefix] + set nocase [tcl::dict::get $thisarg -nocase] foreach e $vlist e_check $vlist_check { if {$nocase} { - set casemsg "(case insensitive)" + set casemsg " (case insensitive)" set choices_test [tcl::string::tolower $choices] set v_test [tcl::string::tolower $e_check] } else { - set casemsg "(case sensitive)" + set casemsg " (case sensitive)" set v_test $e_check set choices_test $choices } - if {$v_test ni $choices_test} { - arg_error "Option $argname for [Get_caller] must be one of the listed values $choices $casemsg. Received: '$e'" $argspecs $argname + set choice_ok 0 + if {$choiceprefix} { + if {![catch {tcl::prefix::match $choices_test $v_test} chosen]} { + set choice_ok 1 + #can we handle empty string as a choice? It should just work - REVIEW/test + set choice [lsearch -inline -nocase $choices $chosen] ;#map possibly lcased choice back to original case in choices list + if {[tcl::dict::get $thisarg -ARGTYPE] eq "option"} { + tcl::dict::set opts $argname $choice + } else { + tcl::dict::set values_dict $argname $choice + } + } + set prefixmsg " (or a unique prefix of a value)" + } else { + set prefixmsg "" + set choice_ok [expr {$v_test in $choices_test}] + } + if {!$choice_ok} { + arg_error "Option $argname for [Get_caller] must be one of the listed values:\n [join $choices "\n "]\n$casemsg$prefixmsg. Received: '$e'" $argspecs $argname } } } diff --git a/src/vfs/_vfscommon/modules/punk/config-0.1.tm b/src/vfs/_vfscommon/modules/punk/config-0.1.tm index 206b560b..1e4de9ec 100644 --- a/src/vfs/_vfscommon/modules/punk/config-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/config-0.1.tm @@ -362,10 +362,11 @@ tcl::namespace::eval punk::config { proc configure {args} { set argd [punk::args::get_dict { - - whichconfig -type string -choices {startup running} + *values -min 1 -max 1 + whichconfig -type string -choices {startup running stop} } $args] + return "unimplemented - $argd" } proc show {whichconfig {globfor *}} { diff --git a/src/vfs/_vfscommon/modules/punk/fileline-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/fileline-0.1.0.tm index 7e1ee14c..22178177 100644 --- a/src/vfs/_vfscommon/modules/punk/fileline-0.1.0.tm +++ b/src/vfs/_vfscommon/modules/punk/fileline-0.1.0.tm @@ -290,7 +290,6 @@ namespace eval punk::fileline::class { -showconfig 0\ -boundaryheader {Boundary %i% at %b%}\ ] - set known_opts [dict keys $defaults] foreach {k v} $args { switch -- $k { -ansi - -offset - -displaybytes - -truncatedmark - -completemark - -moremark - -continuemark - -linemaxwidth - -linebase - -limit - -boundaries - -showconfig - -boundaryheader { diff --git a/src/vfs/_vfscommon/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd b/src/vfs/_vfscommon/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd index 9de4c125..2975975d 100644 --- a/src/vfs/_vfscommon/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd +++ b/src/vfs/_vfscommon/modules/punk/mix/templates/utility/scriptappwrappers/multishell.cmd @@ -417,9 +417,9 @@ set -- "$@" "a=[Hide <#;Hide set;s 1 list]"; set -- : "$@";$1 = @' rename set ""; rename s set; set k {-- "$@" "a}; if {[info exists ::env($k)]} {unset ::env($k)} ;# tidyup and restore Hide :exit_multishell;Hide {<#};Hide '@ namespace eval ::punk::multishell { - set last_script_root [file dirname [file normalize ${argv0}/__]] + set last_script_root [file dirname [file normalize ${::argv0}/__]] set last_script [file dirname [file normalize [info script]/__]] - if {[info exists argv0] && + if {[info exists ::argv0] && $last_script eq $last_script_root } { set ::punk::multishell::is_main($last_script) 1 ;#run as executable/script - likely desirable to launch application and return an exitcode @@ -434,7 +434,7 @@ namespace eval ::punk::multishell { if {![info exists ::punk::multishell::is_main($script_name)]} { #e.g a .dll or something else unanticipated puts stderr "Warning punk::multishell didn't recognize info script result: $script_name - will treat as if sourced and return instead of exiting" - puts stderr "Info: script_root: [file dirname [file normalize ${argv0}/__]]" + puts stderr "Info: script_root: [file dirname [file normalize ${::argv0}/__]]" return 0 } return [set ::punk::multishell::is_main($script_name)] diff --git a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm index 86908ae6..fe55bfd6 100644 --- a/src/vfs/_vfscommon/modules/punk/repl-0.1.tm +++ b/src/vfs/_vfscommon/modules/punk/repl-0.1.tm @@ -2580,15 +2580,15 @@ namespace eval repl { set codethread_cond [thread::cond create] ;#repl::codethread_cond held by parent(repl) vs punk::repl::codethread::replthread_cond held by child(codethread) set codethread_mutex [thread::mutex create] - thread::send $codethread [string map [list %args% [list $opts]\ - %argv0% [list $::argv0]\ - %argv% [list $::argv]\ - %argc% [list $::argc]\ - %replthread% [thread::id]\ - %replthread_cond% $codethread_cond\ - %replthread_interp% [list $opt_callback_interp]\ - %tmlist% [list [tcl::tm::list]]\ - %autopath% [list $::auto_path]\ + thread::send $codethread [string map [list %args% [list $opts]\ + %argv0% [list $::argv0]\ + %argv% [list $::argv]\ + %argc% [list $::argc]\ + %replthread% [thread::id]\ + %replthread_cond% $codethread_cond\ + %replthread_interp% [list $opt_callback_interp]\ + %tmlist% [list [tcl::tm::list]]\ + %autopath% [list $::auto_path]\ ] { set ::argv0 %argv0% set ::argv %argv% @@ -2944,7 +2944,6 @@ namespace eval repl { #catch {package require packageTrace} package require punk package require shellrun - package require shellfilter set running_config $::punk::config::running if {[string length [dict get $running_config color_stderr]] && [punk::console::colour]} { diff --git a/src/vfs/_vfscommon/modules/punk/trie-0.1.0.tm b/src/vfs/_vfscommon/modules/punk/trie-0.1.0.tm new file mode 100644 index 00000000..6f7f9d14 --- /dev/null +++ b/src/vfs/_vfscommon/modules/punk/trie-0.1.0.tm @@ -0,0 +1,600 @@ +# -*- tcl -*- +# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from -buildversion.txt +# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm +# +# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. +# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# (C) CMcC 2010 +# +# @@ Meta Begin +# Application punk::trie 0.1.0 +# Meta platform tcl +# Meta license +# @@ Meta End + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# doctools header +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[manpage_begin shellspy_module_punk::trie 0 0.1.0] +#[copyright "2010"] +#[titledesc {punk::trie API}] [comment {-- Name section and table of contents description --}] +#[moddesc {punk::trie}] [comment {-- Description at end of page heading --}] +#[require punk::trie] +#[keywords module datastructure trie] +#[description] tcl trie implementation courtesy of CmcC (tcl wiki) +#[para] - + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section Overview] +#[para] overview of punk::trie +#[subsection Concepts] +#[para] - + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Requirements +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[subsection dependencies] +#[para] packages used by punk::trie +#[list_begin itemized] + +package require Tcl 8.6- +#*** !doctools +#[item] [package {Tcl 8.6}] + +# #package require frobz +# #*** !doctools +# #[item] [package {frobz}] + +#*** !doctools +#[list_end] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +#*** !doctools +#[section API] + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# oo::class namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#tcl::namespace::eval punk::trie::class { + #*** !doctools + #[subsection {Namespace punk::trie::class}] + #[para] class definitions + #if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { + #*** !doctools + #[list_begin enumerated] + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + #} +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Base namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::trie { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + #variable xyz + proc Dolog {lvl txt} { + #return "$lvl -- $txt" + #logger calls this in such a way that a straight uplevel can get us the vars/commands in messages substituted + set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] punk::trie '[uplevel [list subst $txt]]'" + puts stderr $msg + } + package require logger + logger::initNamespace ::punk::trie + foreach lvl [logger::levels] { + interp alias {} ::punk::trie::Log_$lvl {} ::punk::trie::Dolog $lvl + log::logproc $lvl ::punk::trie::Log_$lvl + } + #namespace path ::punk::trie::log + + #[para] class definitions + if {[tcl::info::commands [tcl::namespace::current]::trieclass] eq ""} { + #*** !doctools + #[list_begin enumerated] + oo::class create [tcl::namespace::current]::trieclass { + variable trie id + + method matches {t what} { + #*** !doctools + #[call class::trieclass [method matches] [arg t] [arg what]] + #[para] search for longest prefix, return matching prefix, element and suffix + + set matches {} + set wlen [string length $what] + foreach k [lsort -decreasing -dictionary [dict keys $t]] { + set klen [string length $k] + set match "" + for {set i 0} {$i < $klen + && $i < $wlen + && [string index $k $i] eq [string index $what $i] + } {incr i} { + append match [string index $k $i] + } + if {$match ne ""} { + lappend matches $match $k + } + } + #Debug.trie {matches: $what -> $matches} + ::punk::trie::log::debug {matches: $what -> $matches} + + if {[dict size $matches]} { + # find the longest matching prefix + set match [lindex [lsort -dictionary [dict keys $matches]] end] + set mel [dict get $matches $match] + set suffix [string range $what [string length $match] end] + + return [list $match $mel $suffix] + } else { + return {} ;# no matches + } + } + + # return next unique id if there's no proffered value + method id {value} { + if {$value} { + return $value + } else { + return [incr id] + } + } + + # insert an element with a given optional value into trie + # along path given by $args (no need to specify) + method insert {what {value 0} args} { + if {[llength $args]} { + set t [dict get $trie {*}$args] + } else { + set t $trie + } + + if {[dict exists $t $what]} { + #Debug.trie {$what is an exact match on path ($args $what)} + ::punk::trie::log::debug {$what is an exact match on path ($args $what)} + if {[catch {dict size [dict get $trie {*}$args $what]} size]} { + # the match is a leaf - we're done + } else { + # the match is a dict - we have to add a null + dict set trie {*}$args $what "" [my id $value] + } + + return ;# exact match - no change + } + + # search for longest prefix + set match [my matches $t $what] + + if {![llength $match]} { + ;# no matching prefix - new element + #Debug.trie {no matching prefix of '$what' in $t - add it on path ($args $what)} + ::punk::trie::log::debug {no matching prefix of '$what' in $t add it on path ($args $what)} + dict set trie {*}$args $what [my id $value] + return + } + + lassign $match match mel suffix ;# prefix, element of match, suffix + + if {$match ne $mel} { + # the matching element shares a prefix, but has a variant suffix + # it must be split + #Debug.trie {splitting '$mel' along '$match'} + ::punk::trie::log::debug {splitting '$mel' along '$match'} + + set melC [dict get $t $mel] + dict unset trie {*}$args $mel + dict set trie {*}$args $match [string range $mel [string length $match] end] $melC + } + + if {[catch {dict size [dict get $trie {*}$args $match]} size]} { + # the match is a leaf - must be split + if {$match eq $mel} { + # the matching element shares a prefix, but has a variant suffix + # it must be split + #Debug.trie {splitting '$mel' along '$match'} + ::punk::trie::log::debug {splitting '$mel' along '$match'} + set melC [dict get $t $mel] + dict unset trie {*}$args $mel + dict set trie {*}$args $match "" $melC + } + #Debug.trie {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} + ::punk::trie::log::debug {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} + set melid [dict get $t $mel] + dict set trie {*}$args $match $suffix [my id $value] + } else { + # it's a dict - keep searching + #Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} + ::punk::trie::log::debug {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} + my insert $suffix $value {*}$args $match + } + return + } + + # find a path matching an element $what + # if the element's not found, return the nearest path + method find_path {what args} { + if {[llength $args]} { + set t [dict get $trie {*}$args] + } else { + set t $trie + } + + if {[dict exists $t $what]} { + #Debug.trie {$what is an exact match on path ($args $what)} + return [list {*}$args $what] ;# exact match - no change + } + + # search for longest prefix + set match [my matches $t $what] + + if {![llength $match]} { + return $args + } + + lassign $match match mel suffix ;# prefix, element of match, suffix + + if {$match ne $mel} { + # the matching element shares a prefix, but has a variant suffix + # no match + return $args + } + + if {[catch {dict size [dict get $trie {*}$args $match]} size] || $size == 0} { + # got to a non-matching leaf - no match + return $args + } else { + # it's a dict - keep searching + #Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} + return [my find_path $suffix {*}$args $match] + } + } + + # given a trie, which may have been modified by deletion, + # optimize it by removing empty nodes and coalescing singleton nodes + method optimize {args} { + if {[llength $args]} { + set t [dict get $trie {*}$args] + } else { + set t $trie + } + + if {[catch {dict size $t} size]} { + #Debug.trie {optimize leaf '$t' along '$args'} + ::punk::trie::log::debug {optimize leaf '$t' along '$args'} + # leaf - leave it + } else { + switch -- $size { + 0 { + #Debug.trie {optimize empty dict ($t) along '$args'} + ::punk::trie::log::debug {optimize empty dict ($t) along '$args'} + if {[llength $args]} { + dict unset trie {*}$args + } + } + 1 { + #Debug.trie {optimize singleton dict ($t) along '$args'} + ::punk::trie::log::debug {optimize singleton dict ($t) along '$args'} + lassign $t k v + if {[llength $args]} { + dict unset trie {*}$args + } + append args $k + if {[llength $v]} { + dict set trie {*}$args $v + } + my optimize {*}$args + } + default { + #Debug.trie {optimize dict ($t) along '$args'} + ::punk::trie::log::debug {optimize dict ($t) along '$args'} + dict for {k v} $t { + my optimize {*}$args $k + } + } + } + } + } + + # delete element $what from trie + method delete {what} { + set path [my find_path $what] + if {[join $path ""] eq $what} { + #Debug.trie {del '$what' along ($path) was [dict get $trie {*}$path]} + if {[catch {dict size [dict get $trie {*}$path]} size]} { + # got to a matching leaf - delete it + dict unset trie {*}$path + set path [lrange $path 0 end-1] + } else { + dict unset trie {*}$path "" + } + + my optimize ;# remove empty and singleton elements + } else { + # nothing to delete, guess we're done + } + } + + # find the value of element $what in trie, + # error if not found + method find_or_error {what} { + set path [my find_path $what] + if {[join $path ""] eq $what} { + if {[catch {dict size [dict get $trie {*}$path]} size]} { + # got to a matching leaf - done + return [dict get $trie {*}$path] + } else { + #JMN - what could be an exact match for a path, but not be in the trie itself + if {[dict exists $trie {*}$path ""]} { + return [dict get $trie {*}$path ""] + } else { + ::punk::trie::log::debug {'$what' matches a path but is not a leaf} + error "'$what' not found" + } + } + } else { + error "'$what' not found" + } + } + #JMN - renamed original find to find_or_error + #prefer not to catch on result - but test for -1 + method find {what} { + set path [my find_path $what] + if {[join $path ""] eq $what} { + #presumably we use catch and dict size to avoid llength shimmering large inner dicts to list rep + if {[catch {dict size [dict get $trie {*}$path]} size]} { + # got to a matching leaf - done + return [dict get $trie {*}$path] + } else { + #JMN - what could be an exact match for a path, but not be in the trie itself + if {[dict exists $trie {*}$path ""]} { + return [dict get $trie {*}$path ""] + } else { + ::punk::trie::log::debug {'$what' matches a path but is not a leaf} + return -1 + } + } + } else { + return -1 + } + } + + # dump the trie as a string + method dump {} { + return $trie + } + + # return a string rep of the trie sorted in dict order + method order {{t {}}} { + if {![llength $t]} { + set t $trie + } elseif {[llength $t] == 1} { + return $t + } + set acc {} + + foreach key [lsort -dictionary [dict keys $t]] { + lappend acc $key [my order [dict get $t $key]] + } + return $acc + } + + # return the trie as a dict of names with values + method flatten {{t {}} {prefix ""}} { + if {![llength $t]} { + set t $trie + } elseif {[llength $t] == 1} { + return [list $prefix $t] + } + + set acc {} + + foreach key [dict keys $t] { + lappend acc {*}[my flatten [dict get $t $key] $prefix$key] + } + return $acc + } + + #shortest possible string to identify an element in the trie using the same principle as tcl::prefix::match + #ie if a stored word is a prefix of any other words - it must be fully specified to identify itself. + #JMN - REVIEW - better algorithms? + #caller having retained all members can avoid flatten call + #by selecting a single 'which' known not to be in the trie (or empty string) - all idents can be returned. + #when all 'which' members are in the tree - scanning stops when they're all found + # - and a dict containing result and scanned keys is returned + # - result contains a dict with keys for each which member + # - scanned contains a dict of all words longer than our shortest which - (and a subset of words the same length) + method shortest_idents {which {allmembers {}}} { + set t $trie + if {![llength $allmembers]} { + set members [dict keys [my flatten]] + } else { + set members $allmembers + } + set len_members [lmap m $members {list [string length $m] $m}] + set longestfirst [lsort -index 0 -integer -decreasing $len_members] + set longestfirst [lmap v $longestfirst {lindex $v 1}] + set taken [dict create] + set scanned [dict create] + set result [dict create] ;#words in our which list - if found + foreach w $longestfirst { + set path [my find_path $w] + if {[dict exists $taken $w]} { + #whole word - no unique prefix + dict set scanned $w $w + if {$w in $which} { + #puts stderr "$w -> $w" + dict set result $w $w + if {[dict size $result] == [llength $which]} { + return [dict create result $result scanned $scanned] + } + } + continue + } + set acc "" + foreach p [lrange $path 0 end-1] { + dict set taken [append acc $p] 1 ;#no need to test first - just set even though may already be present + } + append acc [string index [lindex $path end] 0] + dict set scanned $w $acc ;#sorted by length - so no other can have this prefix - and no longer necessary + if {$w in $which} { + #puts stderr "$w -> $acc" + dict set result $w $acc + if {[dict size $result] == [llength $which]} { + return [dict create result $result scanned $scanned] + } + } + } + return [dict create result $result scanned $scanned] + } + + # overwrite the trie + method set {t} { + set trie $t + } + + constructor {args} { + set trie {} + set id 0 + foreach a $args { + my insert $a + } + } + } + + set testlist [list blah x black blacken] + proc test1 {} { + #JMN + #test that find_or_error of a path that isn't stored as a value returns an appropriate error + #(used to report couldn't find dict key "") + set t [punk::trie::trieclass new blah x black blacken] + if {[catch {$t find_or_error bla} errM]} { + puts stderr "should be error indicating 'bla' not found" + puts stderr "err during $t find bla\n$errM" + } + return $t + } + + + # oo::class create interface_sample1 { + # #*** !doctools + # #[enum] CLASS [class interface_sample1] + # #[list_begin definitions] + + # method test {arg1} { + # #*** !doctools + # #[call class::interface_sample1 [method test] [arg arg1]] + # #[para] test method + # puts "test: $arg1" + # } + + # #*** !doctools + # #[list_end] [comment {-- end definitions interface_sample1}] + # } + + #*** !doctools + #[list_end] [comment {--- end class enumeration ---}] + } + + #*** !doctools + #[subsection {Namespace punk::trie}] + #[para] Core API functions for punk::trie + #[list_begin definitions] + + + + #proc sample1 {p1 n args} { + # #*** !doctools + # #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] + # #[para]Description of sample1 + # #[para] Arguments: + # # [list_begin arguments] + # # [arg_def tring p1] A description of string argument p1. + # # [arg_def integer n] A description of integer argument n. + # # [list_end] + # return "ok" + #} + + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::trie ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +# Secondary API namespace +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +tcl::namespace::eval punk::trie::lib { + tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase + tcl::namespace::path [tcl::namespace::parent] + #*** !doctools + #[subsection {Namespace punk::trie::lib}] + #[para] Secondary functions that are part of the API + #[list_begin definitions] + + #proc utility1 {p1 args} { + # #*** !doctools + # #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] + # #[para]Description of utility1 + # return 1 + #} + + + + #*** !doctools + #[list_end] [comment {--- end definitions namespace punk::trie::lib ---}] +} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ + + + +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +#*** !doctools +#[section Internal] +#tcl::namespace::eval punk::trie::system { + #*** !doctools + #[subsection {Namespace punk::trie::system}] + #[para] Internal functions that are not part of the API + + + +#} +# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +## Ready +package provide punk::trie [tcl::namespace::eval punk::trie { + variable pkg punk::trie + variable version + set version 0.1.0 +}] +return + +#*** !doctools +#[manpage_end] +