Julian Noble
3 months ago
7 changed files with 753 additions and 33 deletions
@ -0,0 +1,600 @@
|
||||
# -*- tcl -*- |
||||
# Maintenance Instruction: leave the 999999.xxx.x as is and use punkshell 'pmix make' or bin/punkmake to update from <pkg>-buildversion.txt |
||||
# module template: shellspy/src/decktemplates/vendor/punk/modules/template_module-0.0.3.tm |
||||
# |
||||
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem. |
||||
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository. |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# (C) CMcC 2010 |
||||
# |
||||
# @@ Meta Begin |
||||
# Application punk::trie 0.1.0 |
||||
# Meta platform tcl |
||||
# Meta license <unspecified> |
||||
# @@ Meta End |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# doctools header |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
#*** !doctools |
||||
#[manpage_begin shellspy_module_punk::trie 0 0.1.0] |
||||
#[copyright "2010"] |
||||
#[titledesc {punk::trie API}] [comment {-- Name section and table of contents description --}] |
||||
#[moddesc {punk::trie}] [comment {-- Description at end of page heading --}] |
||||
#[require punk::trie] |
||||
#[keywords module datastructure trie] |
||||
#[description] tcl trie implementation courtesy of CmcC (tcl wiki) |
||||
#[para] - |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[section Overview] |
||||
#[para] overview of punk::trie |
||||
#[subsection Concepts] |
||||
#[para] - |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Requirements |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[subsection dependencies] |
||||
#[para] packages used by punk::trie |
||||
#[list_begin itemized] |
||||
|
||||
package require Tcl 8.6- |
||||
#*** !doctools |
||||
#[item] [package {Tcl 8.6}] |
||||
|
||||
# #package require frobz |
||||
# #*** !doctools |
||||
# #[item] [package {frobz}] |
||||
|
||||
#*** !doctools |
||||
#[list_end] |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
#*** !doctools |
||||
#[section API] |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# oo::class namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
#tcl::namespace::eval punk::trie::class { |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::trie::class}] |
||||
#[para] class definitions |
||||
#if {[tcl::info::commands [tcl::namespace::current]::interface_sample1] eq ""} { |
||||
#*** !doctools |
||||
#[list_begin enumerated] |
||||
|
||||
# oo::class create interface_sample1 { |
||||
# #*** !doctools |
||||
# #[enum] CLASS [class interface_sample1] |
||||
# #[list_begin definitions] |
||||
|
||||
# method test {arg1} { |
||||
# #*** !doctools |
||||
# #[call class::interface_sample1 [method test] [arg arg1]] |
||||
# #[para] test method |
||||
# puts "test: $arg1" |
||||
# } |
||||
|
||||
# #*** !doctools |
||||
# #[list_end] [comment {-- end definitions interface_sample1}] |
||||
# } |
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end class enumeration ---}] |
||||
#} |
||||
#} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# Base namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
tcl::namespace::eval punk::trie { |
||||
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase |
||||
#variable xyz |
||||
proc Dolog {lvl txt} { |
||||
#return "$lvl -- $txt" |
||||
#logger calls this in such a way that a straight uplevel can get us the vars/commands in messages substituted |
||||
set msg "[clock format [clock seconds] -format "%Y-%m-%dT%H:%M:%S"] punk::trie '[uplevel [list subst $txt]]'" |
||||
puts stderr $msg |
||||
} |
||||
package require logger |
||||
logger::initNamespace ::punk::trie |
||||
foreach lvl [logger::levels] { |
||||
interp alias {} ::punk::trie::Log_$lvl {} ::punk::trie::Dolog $lvl |
||||
log::logproc $lvl ::punk::trie::Log_$lvl |
||||
} |
||||
#namespace path ::punk::trie::log |
||||
|
||||
#[para] class definitions |
||||
if {[tcl::info::commands [tcl::namespace::current]::trieclass] eq ""} { |
||||
#*** !doctools |
||||
#[list_begin enumerated] |
||||
oo::class create [tcl::namespace::current]::trieclass { |
||||
variable trie id |
||||
|
||||
method matches {t what} { |
||||
#*** !doctools |
||||
#[call class::trieclass [method matches] [arg t] [arg what]] |
||||
#[para] search for longest prefix, return matching prefix, element and suffix |
||||
|
||||
set matches {} |
||||
set wlen [string length $what] |
||||
foreach k [lsort -decreasing -dictionary [dict keys $t]] { |
||||
set klen [string length $k] |
||||
set match "" |
||||
for {set i 0} {$i < $klen |
||||
&& $i < $wlen |
||||
&& [string index $k $i] eq [string index $what $i] |
||||
} {incr i} { |
||||
append match [string index $k $i] |
||||
} |
||||
if {$match ne ""} { |
||||
lappend matches $match $k |
||||
} |
||||
} |
||||
#Debug.trie {matches: $what -> $matches} |
||||
::punk::trie::log::debug {matches: $what -> $matches} |
||||
|
||||
if {[dict size $matches]} { |
||||
# find the longest matching prefix |
||||
set match [lindex [lsort -dictionary [dict keys $matches]] end] |
||||
set mel [dict get $matches $match] |
||||
set suffix [string range $what [string length $match] end] |
||||
|
||||
return [list $match $mel $suffix] |
||||
} else { |
||||
return {} ;# no matches |
||||
} |
||||
} |
||||
|
||||
# return next unique id if there's no proffered value |
||||
method id {value} { |
||||
if {$value} { |
||||
return $value |
||||
} else { |
||||
return [incr id] |
||||
} |
||||
} |
||||
|
||||
# insert an element with a given optional value into trie |
||||
# along path given by $args (no need to specify) |
||||
method insert {what {value 0} args} { |
||||
if {[llength $args]} { |
||||
set t [dict get $trie {*}$args] |
||||
} else { |
||||
set t $trie |
||||
} |
||||
|
||||
if {[dict exists $t $what]} { |
||||
#Debug.trie {$what is an exact match on path ($args $what)} |
||||
::punk::trie::log::debug {$what is an exact match on path ($args $what)} |
||||
if {[catch {dict size [dict get $trie {*}$args $what]} size]} { |
||||
# the match is a leaf - we're done |
||||
} else { |
||||
# the match is a dict - we have to add a null |
||||
dict set trie {*}$args $what "" [my id $value] |
||||
} |
||||
|
||||
return ;# exact match - no change |
||||
} |
||||
|
||||
# search for longest prefix |
||||
set match [my matches $t $what] |
||||
|
||||
if {![llength $match]} { |
||||
;# no matching prefix - new element |
||||
#Debug.trie {no matching prefix of '$what' in $t - add it on path ($args $what)} |
||||
::punk::trie::log::debug {no matching prefix of '$what' in $t add it on path ($args $what)} |
||||
dict set trie {*}$args $what [my id $value] |
||||
return |
||||
} |
||||
|
||||
lassign $match match mel suffix ;# prefix, element of match, suffix |
||||
|
||||
if {$match ne $mel} { |
||||
# the matching element shares a prefix, but has a variant suffix |
||||
# it must be split |
||||
#Debug.trie {splitting '$mel' along '$match'} |
||||
::punk::trie::log::debug {splitting '$mel' along '$match'} |
||||
|
||||
set melC [dict get $t $mel] |
||||
dict unset trie {*}$args $mel |
||||
dict set trie {*}$args $match [string range $mel [string length $match] end] $melC |
||||
} |
||||
|
||||
if {[catch {dict size [dict get $trie {*}$args $match]} size]} { |
||||
# the match is a leaf - must be split |
||||
if {$match eq $mel} { |
||||
# the matching element shares a prefix, but has a variant suffix |
||||
# it must be split |
||||
#Debug.trie {splitting '$mel' along '$match'} |
||||
::punk::trie::log::debug {splitting '$mel' along '$match'} |
||||
set melC [dict get $t $mel] |
||||
dict unset trie {*}$args $mel |
||||
dict set trie {*}$args $match "" $melC |
||||
} |
||||
#Debug.trie {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} |
||||
::punk::trie::log::debug {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} |
||||
set melid [dict get $t $mel] |
||||
dict set trie {*}$args $match $suffix [my id $value] |
||||
} else { |
||||
# it's a dict - keep searching |
||||
#Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} |
||||
::punk::trie::log::debug {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} |
||||
my insert $suffix $value {*}$args $match |
||||
} |
||||
return |
||||
} |
||||
|
||||
# find a path matching an element $what |
||||
# if the element's not found, return the nearest path |
||||
method find_path {what args} { |
||||
if {[llength $args]} { |
||||
set t [dict get $trie {*}$args] |
||||
} else { |
||||
set t $trie |
||||
} |
||||
|
||||
if {[dict exists $t $what]} { |
||||
#Debug.trie {$what is an exact match on path ($args $what)} |
||||
return [list {*}$args $what] ;# exact match - no change |
||||
} |
||||
|
||||
# search for longest prefix |
||||
set match [my matches $t $what] |
||||
|
||||
if {![llength $match]} { |
||||
return $args |
||||
} |
||||
|
||||
lassign $match match mel suffix ;# prefix, element of match, suffix |
||||
|
||||
if {$match ne $mel} { |
||||
# the matching element shares a prefix, but has a variant suffix |
||||
# no match |
||||
return $args |
||||
} |
||||
|
||||
if {[catch {dict size [dict get $trie {*}$args $match]} size] || $size == 0} { |
||||
# got to a non-matching leaf - no match |
||||
return $args |
||||
} else { |
||||
# it's a dict - keep searching |
||||
#Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} |
||||
return [my find_path $suffix {*}$args $match] |
||||
} |
||||
} |
||||
|
||||
# given a trie, which may have been modified by deletion, |
||||
# optimize it by removing empty nodes and coalescing singleton nodes |
||||
method optimize {args} { |
||||
if {[llength $args]} { |
||||
set t [dict get $trie {*}$args] |
||||
} else { |
||||
set t $trie |
||||
} |
||||
|
||||
if {[catch {dict size $t} size]} { |
||||
#Debug.trie {optimize leaf '$t' along '$args'} |
||||
::punk::trie::log::debug {optimize leaf '$t' along '$args'} |
||||
# leaf - leave it |
||||
} else { |
||||
switch -- $size { |
||||
0 { |
||||
#Debug.trie {optimize empty dict ($t) along '$args'} |
||||
::punk::trie::log::debug {optimize empty dict ($t) along '$args'} |
||||
if {[llength $args]} { |
||||
dict unset trie {*}$args |
||||
} |
||||
} |
||||
1 { |
||||
#Debug.trie {optimize singleton dict ($t) along '$args'} |
||||
::punk::trie::log::debug {optimize singleton dict ($t) along '$args'} |
||||
lassign $t k v |
||||
if {[llength $args]} { |
||||
dict unset trie {*}$args |
||||
} |
||||
append args $k |
||||
if {[llength $v]} { |
||||
dict set trie {*}$args $v |
||||
} |
||||
my optimize {*}$args |
||||
} |
||||
default { |
||||
#Debug.trie {optimize dict ($t) along '$args'} |
||||
::punk::trie::log::debug {optimize dict ($t) along '$args'} |
||||
dict for {k v} $t { |
||||
my optimize {*}$args $k |
||||
} |
||||
} |
||||
} |
||||
} |
||||
} |
||||
|
||||
# delete element $what from trie |
||||
method delete {what} { |
||||
set path [my find_path $what] |
||||
if {[join $path ""] eq $what} { |
||||
#Debug.trie {del '$what' along ($path) was [dict get $trie {*}$path]} |
||||
if {[catch {dict size [dict get $trie {*}$path]} size]} { |
||||
# got to a matching leaf - delete it |
||||
dict unset trie {*}$path |
||||
set path [lrange $path 0 end-1] |
||||
} else { |
||||
dict unset trie {*}$path "" |
||||
} |
||||
|
||||
my optimize ;# remove empty and singleton elements |
||||
} else { |
||||
# nothing to delete, guess we're done |
||||
} |
||||
} |
||||
|
||||
# find the value of element $what in trie, |
||||
# error if not found |
||||
method find_or_error {what} { |
||||
set path [my find_path $what] |
||||
if {[join $path ""] eq $what} { |
||||
if {[catch {dict size [dict get $trie {*}$path]} size]} { |
||||
# got to a matching leaf - done |
||||
return [dict get $trie {*}$path] |
||||
} else { |
||||
#JMN - what could be an exact match for a path, but not be in the trie itself |
||||
if {[dict exists $trie {*}$path ""]} { |
||||
return [dict get $trie {*}$path ""] |
||||
} else { |
||||
::punk::trie::log::debug {'$what' matches a path but is not a leaf} |
||||
error "'$what' not found" |
||||
} |
||||
} |
||||
} else { |
||||
error "'$what' not found" |
||||
} |
||||
} |
||||
#JMN - renamed original find to find_or_error |
||||
#prefer not to catch on result - but test for -1 |
||||
method find {what} { |
||||
set path [my find_path $what] |
||||
if {[join $path ""] eq $what} { |
||||
#presumably we use catch and dict size to avoid llength shimmering large inner dicts to list rep |
||||
if {[catch {dict size [dict get $trie {*}$path]} size]} { |
||||
# got to a matching leaf - done |
||||
return [dict get $trie {*}$path] |
||||
} else { |
||||
#JMN - what could be an exact match for a path, but not be in the trie itself |
||||
if {[dict exists $trie {*}$path ""]} { |
||||
return [dict get $trie {*}$path ""] |
||||
} else { |
||||
::punk::trie::log::debug {'$what' matches a path but is not a leaf} |
||||
return -1 |
||||
} |
||||
} |
||||
} else { |
||||
return -1 |
||||
} |
||||
} |
||||
|
||||
# dump the trie as a string |
||||
method dump {} { |
||||
return $trie |
||||
} |
||||
|
||||
# return a string rep of the trie sorted in dict order |
||||
method order {{t {}}} { |
||||
if {![llength $t]} { |
||||
set t $trie |
||||
} elseif {[llength $t] == 1} { |
||||
return $t |
||||
} |
||||
set acc {} |
||||
|
||||
foreach key [lsort -dictionary [dict keys $t]] { |
||||
lappend acc $key [my order [dict get $t $key]] |
||||
} |
||||
return $acc |
||||
} |
||||
|
||||
# return the trie as a dict of names with values |
||||
method flatten {{t {}} {prefix ""}} { |
||||
if {![llength $t]} { |
||||
set t $trie |
||||
} elseif {[llength $t] == 1} { |
||||
return [list $prefix $t] |
||||
} |
||||
|
||||
set acc {} |
||||
|
||||
foreach key [dict keys $t] { |
||||
lappend acc {*}[my flatten [dict get $t $key] $prefix$key] |
||||
} |
||||
return $acc |
||||
} |
||||
|
||||
#shortest possible string to identify an element in the trie using the same principle as tcl::prefix::match |
||||
#ie if a stored word is a prefix of any other words - it must be fully specified to identify itself. |
||||
#JMN - REVIEW - better algorithms? |
||||
#caller having retained all members can avoid flatten call |
||||
#by selecting a single 'which' known not to be in the trie (or empty string) - all idents can be returned. |
||||
#when all 'which' members are in the tree - scanning stops when they're all found |
||||
# - and a dict containing result and scanned keys is returned |
||||
# - result contains a dict with keys for each which member |
||||
# - scanned contains a dict of all words longer than our shortest which - (and a subset of words the same length) |
||||
method shortest_idents {which {allmembers {}}} { |
||||
set t $trie |
||||
if {![llength $allmembers]} { |
||||
set members [dict keys [my flatten]] |
||||
} else { |
||||
set members $allmembers |
||||
} |
||||
set len_members [lmap m $members {list [string length $m] $m}] |
||||
set longestfirst [lsort -index 0 -integer -decreasing $len_members] |
||||
set longestfirst [lmap v $longestfirst {lindex $v 1}] |
||||
set taken [dict create] |
||||
set scanned [dict create] |
||||
set result [dict create] ;#words in our which list - if found |
||||
foreach w $longestfirst { |
||||
set path [my find_path $w] |
||||
if {[dict exists $taken $w]} { |
||||
#whole word - no unique prefix |
||||
dict set scanned $w $w |
||||
if {$w in $which} { |
||||
#puts stderr "$w -> $w" |
||||
dict set result $w $w |
||||
if {[dict size $result] == [llength $which]} { |
||||
return [dict create result $result scanned $scanned] |
||||
} |
||||
} |
||||
continue |
||||
} |
||||
set acc "" |
||||
foreach p [lrange $path 0 end-1] { |
||||
dict set taken [append acc $p] 1 ;#no need to test first - just set even though may already be present |
||||
} |
||||
append acc [string index [lindex $path end] 0] |
||||
dict set scanned $w $acc ;#sorted by length - so no other can have this prefix - and no longer necessary |
||||
if {$w in $which} { |
||||
#puts stderr "$w -> $acc" |
||||
dict set result $w $acc |
||||
if {[dict size $result] == [llength $which]} { |
||||
return [dict create result $result scanned $scanned] |
||||
} |
||||
} |
||||
} |
||||
return [dict create result $result scanned $scanned] |
||||
} |
||||
|
||||
# overwrite the trie |
||||
method set {t} { |
||||
set trie $t |
||||
} |
||||
|
||||
constructor {args} { |
||||
set trie {} |
||||
set id 0 |
||||
foreach a $args { |
||||
my insert $a |
||||
} |
||||
} |
||||
} |
||||
|
||||
set testlist [list blah x black blacken] |
||||
proc test1 {} { |
||||
#JMN |
||||
#test that find_or_error of a path that isn't stored as a value returns an appropriate error |
||||
#(used to report couldn't find dict key "") |
||||
set t [punk::trie::trieclass new blah x black blacken] |
||||
if {[catch {$t find_or_error bla} errM]} { |
||||
puts stderr "should be error indicating 'bla' not found" |
||||
puts stderr "err during $t find bla\n$errM" |
||||
} |
||||
return $t |
||||
} |
||||
|
||||
|
||||
# oo::class create interface_sample1 { |
||||
# #*** !doctools |
||||
# #[enum] CLASS [class interface_sample1] |
||||
# #[list_begin definitions] |
||||
|
||||
# method test {arg1} { |
||||
# #*** !doctools |
||||
# #[call class::interface_sample1 [method test] [arg arg1]] |
||||
# #[para] test method |
||||
# puts "test: $arg1" |
||||
# } |
||||
|
||||
# #*** !doctools |
||||
# #[list_end] [comment {-- end definitions interface_sample1}] |
||||
# } |
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end class enumeration ---}] |
||||
} |
||||
|
||||
#*** !doctools |
||||
#[subsection {Namespace punk::trie}] |
||||
#[para] Core API functions for punk::trie |
||||
#[list_begin definitions] |
||||
|
||||
|
||||
|
||||
#proc sample1 {p1 n args} { |
||||
# #*** !doctools |
||||
# #[call [fun sample1] [arg p1] [arg n] [opt {option value...}]] |
||||
# #[para]Description of sample1 |
||||
# #[para] Arguments: |
||||
# # [list_begin arguments] |
||||
# # [arg_def tring p1] A description of string argument p1. |
||||
# # [arg_def integer n] A description of integer argument n. |
||||
# # [list_end] |
||||
# return "ok" |
||||
#} |
||||
|
||||
|
||||
|
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end definitions namespace punk::trie ---}] |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
# Secondary API namespace |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
tcl::namespace::eval punk::trie::lib { |
||||
tcl::namespace::export {[a-z]*} ;# Convention: export all lowercase |
||||
tcl::namespace::path [tcl::namespace::parent] |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::trie::lib}] |
||||
#[para] Secondary functions that are part of the API |
||||
#[list_begin definitions] |
||||
|
||||
#proc utility1 {p1 args} { |
||||
# #*** !doctools |
||||
# #[call lib::[fun utility1] [arg p1] [opt {?option value...?}]] |
||||
# #[para]Description of utility1 |
||||
# return 1 |
||||
#} |
||||
|
||||
|
||||
|
||||
#*** !doctools |
||||
#[list_end] [comment {--- end definitions namespace punk::trie::lib ---}] |
||||
} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
|
||||
|
||||
|
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
#*** !doctools |
||||
#[section Internal] |
||||
#tcl::namespace::eval punk::trie::system { |
||||
#*** !doctools |
||||
#[subsection {Namespace punk::trie::system}] |
||||
#[para] Internal functions that are not part of the API |
||||
|
||||
|
||||
|
||||
#} |
||||
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++ |
||||
## Ready |
||||
package provide punk::trie [tcl::namespace::eval punk::trie { |
||||
variable pkg punk::trie |
||||
variable version |
||||
set version 0.1.0 |
||||
}] |
||||
return |
||||
|
||||
#*** !doctools |
||||
#[manpage_end] |
||||
|
Loading…
Reference in new issue