#! /usr/bin/env tclsh package require flagfilter namespace import ::flagfilter::check_flags namespace eval natsort { #REVIEW - determine and document the purpose of scriptdir being added to tm path proc scriptdir {} { set possibly_linked_script [file dirname [file normalize [file join [info script] __dummy__]]] if {[file isdirectory $possibly_linked_script]} { return $possibly_linked_script } else { return [file dirname $possibly_linked_script] } } if {![interp issafe]} { set sdir [scriptdir] #puts stderr "natsort tcl::tm::add $sdir" if {$sdir ni [tcl::tm::list]} { catch {tcl::tm::add $sdir} } } } namespace eval natsort { variable stacktrace_on 0 proc do_error {msg {then error}} { #note we exit or error out even if debug selected - as every do_error call is meant to interrupt code processing at the site of call #this is not just a 'logging' call even though it has log-like descriptors lassign $then type code if {$code eq ""} { set code 1 } set type [string tolower $type] set levels [list debug info notice warn error critical] if {$type in [concat $levels exit]} { puts stderr "|$type> $msg" } else { puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be one of '$levels' or 'exit '" } flush stderr if {$::tcl_interactive} { #may not always be desirable - but assumed to be more useful not to exit despite request, to aid in debugging if {[string tolower $type] eq "exit"} { puts stderr " (exit suppressed due to tcl_interactive - raising error instead)" if {![string is digit -strict $code]} { puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be: 'exit '" } flush stderr } return -code error $msg } else { if {$type ne "exit"} { return -code error $msg } else { if {[string is digit -strict $code]} { exit $code } else { puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be 'error' or 'exit '" return -code error $msg } } } } variable debug 0 variable testlist set testlist { 00.test-firstposition.txt 0001.blah.txt 1.test-sorts-after-all-leadingzero-number-one-equivs.txt 1010.thousand-and-ten.second.txt 01010.thousand-and-ten.first.txt 0001.aaa.txt 001.zzz.txt 08.octal.txt-last-octal 008.another-octal-first-octal.txt 08.again-second-octal.txt 001.a.txt 0010.reconfig.txt 010.etc.txt 005.etc.01.txt 005.Etc.02.txt 005.123.abc.txt 200.somewhere.txt 2zzzz.before-somewhere.txt 00222-after-somewhere.txt 005.00010.abc.txt 005.a3423bc.00010.abc.txt 005.001.abc.txt 005.etc.1010.txt 005.etc.010.txt 005.etc.10.txt " 005.etc.10.txt" 005.etc.001.txt 20.somewhere.txt 4611686018427387904999999999-bignum.txt 4611686018427387903-bigishnum.txt 9223372036854775807-bigint.txt etca-a etc-a etc2-a a0001blah.txt a010.txt winlike-sort-difference-0.1.txt winlike-sort-difference-0.1.1.txt a1.txt b1-a0001blah.txt b1-a010.txt b1-a1.txt -a1.txt --a1.txt --a10.txt 2.high-two.yml 02.higher-two.yml reconfig.txt _common.stuff.txt CASETEST.txt casetest.txt something.txt some~thing.txt someathing.txt someThing.txt thing.txt thing_revised.txt thing-revised.txt "thing revised.txt" "spacetest.txt" " spacetest.txt" " spacetest.txt" "spacetest2.txt" "spacetest 2.txt" "spacetest02.txt" name.txt name2.txt "name .txt" "name2 .txt" blah.txt combined.txt a001.txt .test .ssh "Feb 10.txt" "Feb 8.txt" 1ab23v23v3r89ad8a8a8a9d.txt "Folder (10)/file.tar.gz" "Folder/file.tar.gz" "Folder (1)/file (1).tar.gz" "Folder (1)/file.tar.gz" "Folder (01)/file.tar.gz" "Folder1/file.tar.gz" "Folder(1)/file.tar.gz" } lappend testlist "Some file.txt" lappend testlist " Some extra file1.txt" lappend testlist " Some extra file01.txt" lappend testlist " some extra file1.txt" lappend testlist " Some extra file003.txt" lappend testlist " Some file.txt" lappend testlist "Some extra file02.txt" lappend testlist "Program Files (x86)" lappend testlist "01999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999991-bigger-pathologically-bignum.txt" lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt" lappend testlist "29999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-smaller-pathologically-bignum.txt" lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 1.txt" lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 01.txt" lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a01a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt" lappend testlist "b1b1b1b1.txt" lappend testlist "b1b01z1z1.txt" lappend testlist "c1c111c1.txt" lappend testlist "c1c1c1c1.txt" namespace eval overtype { proc right {args} { # @d !todo - implement overflow, length checks etc if {[llength $args] < 2} { error {usage: ?-overflow [1|0]? undertext overtext} } foreach {undertext overtext} [lrange $args end-1 end] break set opt(-overflow) 0 array set opt [lrange $args 0 end-2] set olen [string length $overtext] set ulen [string length $undertext] if {$opt(-overflow)} { return [string range $undertext 0 end-$olen]$overtext } else { if {$olen > $ulen} { set diff [expr {$olen - $ulen}] return [string range $undertext 0 end-$olen][string range $overtext 0 end-$diff] } else { return [string range $undertext 0 end-$olen]$overtext } } } proc left {args} { # @c overtype starting at left (overstrike) # @c can/should we use something like this?: 'format "%-*s" $len $overtext if {[llength $args] < 2} { error {usage: ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext} } foreach {undertext overtext} [lrange $args end-1 end] break set opt(-ellipsis) 0 set opt(-ellipsistext) {...} set opt(-overflow) 0 array set opt [lrange $args 0 end-2] set len [string length $undertext] set overlen [string length $overtext] set diff [expr {$overlen - $len}] #puts stdout "====================>overtype: datalen:$len overlen:$overlen diff:$diff" #puts stdout "====================>overtype: data: $overtext" if {$diff > 0} { if {$opt(-overflow)} { return $overtext } else { if {$opt(-ellipsis)} { return [overtype::left [string range $overtext 0 [expr {$len -1}]] $opt(-ellipsistext)] } else { return [string range $overtext 0 [expr {$len -1}]] } } } else { return "$overtext[string range $undertext $overlen end]" } } } #considered using hex to make large numbers more compact for viewing in debug output - but it's not that much shorter and probably obscures more than it helps. proc hex2dec {largeHex} { #todo - use punk::lib::hex2dec - (scan supports ll so can do larger hex values directly) set res 0 set largeHex [string map {_ {}} $largeHex] if {[string length $largeHex] <=7} { #scan can process up to FFFFFFF and does so quickly return [scan $largeHex %x] } foreach hexDigit [split $largeHex {}] { set new 0x$hexDigit set res [expr {16*$res + $new}] } return $res } proc dec2hex {decimalNumber} { format %4.4llX $decimalNumber } #punk::lib::trimzero proc trimzero {number} { set trimmed [string trimleft $number 0] if {[string length $trimmed] == 0} { set trimmed 0 } return $trimmed } #todo - consider human numeric split #e.g consider SI suffixes k|KMGTPEZY in that order #in this context, for natural sorting - numeric segments don't contain underscores or other punctuation such as . - + etc. #review - what about unicode equivalents such as wide numerals \UFF10 to \UFF19? unicode normalization? proc split_numeric_segments {name} { set segments [list] while {[string length $name]} { if {[scan $name {%[0-9]%n} chunk len] == 2} { lappend segments $chunk set name [string range $name $len end] } if {[scan $name {%[^0-9]%n} chunk len] == 2} { lappend segments $chunk set name [string range $name $len end] } } return $segments } proc padleft {str count {ch " "}} { set val [string repeat $ch $count] append val $str set diff [expr {max(0,$count - [string length $str])}] set offset [expr {max(0,$count - $diff)}] set val [string range $val $offset end] } # Sqlite may have limited collation sequences available in default builds. # with custom builds - there may be others such as 'natsort' - see https://sqlite.org/forum/forumpost/e4dc6f3331 # This is of limited use with the few builtin collations available in 2023 ie binary,nocase & rtrim # but may provide a quicker,flexible sort option, especially if/when more collation sequences are added to sqlite # There are also prebuilt packages such as sqlite3-icu which allows things like "SELECT icu_load_collation('en_AU', 'australian');" proc sort_sqlite {stringlist args} { package require sqlite3 set args [check_flags -caller natsort_sqlite -defaults [list -db :memory: -collate nocase -winlike 0 -topchars "\uFFFF" -debug 0 -splitchars [list / . - _] -extras {all}] -values $args] set db [string trim [dict get $args -db]] set collate [string trim [dict get $args -collate]] set debug [string trim [dict get $args -debug]] set topchars [string trim [dict get $args -topchars]] set topdot [expr {"." in $topchars}] set topunderscore [expr {"_" in $topchars}] sqlite3 db_sort_basic $db set orderedlist [list] db_sort_basic eval [string map [list %collate% $collate] {create table sqlitesort(index0 text COLLATE %collate%, name text COLLATE %collate%)}] foreach nm $stringlist { set segments [split_numeric_segments $nm] set index "" set s 0 foreach seg $segments { if {($s == 0) && ![string length [string trim $seg]]} { #don't index leading space } elseif {($s == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { append index "[padleft "0" 5]-d -100 topunderscore " append index [string trim $seg] } elseif {($s == 0) && ($topdot) && [string match .* [string trim $seg]]} { append index "[padleft "0" 5]-d -50 topdot " append index [string trim $seg] } else { if {[string is digit [string trim $seg]]} { set basenum [trimzero [string trim $seg]] set lengthindex "[padleft [string length $basenum] 5]-d" append index "$lengthindex " #append index [padleft $basenum 40] append index $basenum } else { append index [string trim $seg] } } incr s } puts stdout ">>$index" db_sort_basic eval {insert into sqlitesort values($index,$nm)} } db_sort_basic eval [string map [list %collate% $collate] {select name from sqlitesort order by index0 COLLATE %collate% ASC, name COLLATE %collate% ASC }] { lappend orderedlist $name } db_sort_basic close return $orderedlist } proc get_leading_char_count {str char} { #todo - something more elegant? regex? set count 0 foreach c [split $str "" ] { if {$c eq $char} { incr count } else { break } } return $count } proc stacktrace {} { set stack "Stack trace:\n" for {set i 1} {$i < [info level]} {incr i} { set lvl [info level -$i] set pname [lindex $lvl 0] append stack [string repeat " " $i]$pname if {![catch {info args $pname} pargs]} { foreach value [lrange $lvl 1 end] arg $pargs { if {$value eq ""} { if {$arg != 0} { info default $pname $arg value } } append stack " $arg='$value'" } } else { append stack " !unknown vars for $pname" } append stack \n } return $stack } proc get_char_count {str char} { #faster than lsearch on split for str of a few K expr {[tcl::string::length $str]-[tcl::string::length [tcl::string::map "$char {}" $str]]} } proc build_key {chunk splitchars topdict tagconfig debug} { variable stacktrace_on if {$stacktrace_on} { puts stderr "+++>[stacktrace]" } set index_map [list - "" _ ""] #e.g - need to maintain the order #a b.txt #a book.txt #ab.txt #abacus.txt set original_splitchars [dict get $tagconfig original_splitchars] # tag_dashes test moved from loop - review set tag_dashes 0 if {![string length [dict get $tagconfig last_part_text_tag]]} { #winlike set tag_dashes 1 } if {("-" ni $original_splitchars)} { set tag_dashes 1 } if {$debug >= 3} { puts stdout "START build_key chunk : $chunk" puts stdout "START build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" } ## index_map will have no effect if we've already split on the char anyway(?) #foreach m [dict keys $index_map] { # if {$m in $original_splitchars} { # dict unset index_map $m # } #} #if {![string length $chunk]} return set result "" if {![llength $splitchars]} { #no more structural splits - but we need to examine numeric/non-numeric segments at the lowest level. # we are at a leaf in the recursive split hierarchy set s "" ;#we never actually split on "" (unless that was put in splitchars.. but it probably shouldn't be) set parts [list $chunk] ;#important to treat as list or leading/trailing whitespace lost } else { set s [lindex $splitchars 0] if {"spudbucket$s" in "[split $chunk {}]"} { error "dead-branch spudbucket" set partindex [build_key $chunk [lrange $splitchars 1 end] $topdict $tagconfig $debug] if {[dict get $tagconfig showsplits]} { set pfx "(1${s}=)" ;# = sorts before _ set partindex ${pfx}$partindex } return $partindex } else { set parts_below_index "" if {$s ni [split $chunk ""]} { #$s can be an empty string set parts [list $chunk] } else { set parts [split $chunk $s] ;#whitespace preserved - even if splitting on s that is not in string. } #assert - we have a splitchar $s that is in the chunk - so at least one part if {(![string length $s] || [llength $parts] == 0)} { error "buld_key assertion false empty split char and/or no parts" } set pnum 1 ;# 1 based for clarity of reading index in debug output set subpart_count [llength $parts] set sub_splits [lrange $splitchars 1 end] ;#pass same splitchars to each subpart foreach p $parts { set partindex [build_key $p $sub_splits $topdict $tagconfig $debug] set lastpart [expr {$pnum == $subpart_count}] ####################### set showsplits [dict get $tagconfig showsplits] #split prefixing experiment - maybe not suitable for general use - as it affects sort order #note that pfx must be consistent until last one, no matter how many partnumbers there are in total. # we don't want to influence sort order before reaching end. #e.g for: #(1.=)... #(1._)...(2._)...(3.=) #(1._)...(2.=) #Note that this is probably more suitable for highly structure dependant sorts where the results are maybe less.. natural. if {$showsplits} { if {$lastpart} { set pfx "(${pnum}${s}_" #set pfx "(${pnum}${s}=)" ;# = sorts before _ } else { set pfx "(${pnum}${s}_" } append parts_below_index $pfx } ####################### if {$lastpart} { if {[string length $p] && [string is digit $p]} { set last_part_tag "<22${s}>" } else { set last_part_tag "<33${s}>" } set last_part_text_tag [dict get $tagconfig last_part_text_tag] #for -winlike 1 there is no tag configured. Windows explorer likes to put things in the order: # module-0.1.1.tm # module-0.1.1.2.tm # module-0.1.tm # arguably -winlike 0 is more natural/human # module-0.1.tm # module-0.1.1.tm # module-0.1.1.2.tm if {[string length $last_part_text_tag]} { #replace only the first text-tag (<30>) from the subpart_index if {[string match "<30?>*" $partindex]} { #give textual string index a specific tag for last part in split only. e.g <130> for lower than integers set partindex "<130>[string range $partindex 5 end]" } #append parts_below_index $last_part_tag } #set partindex $last_part_tag$partindex } append parts_below_index $partindex if {$showsplits} { if {$lastpart} { set suffix "${pnum}${s}=)" ;# = sorts before _ } else { set suffix "${pnum}${s}_)" } append parts_below_index $suffix } incr pnum } append parts_below_index "" ;# don't add anything at the tail that may perturb sort order if {$debug >= 3} { set pad [string repeat " " 20] puts stdout "END build_key chunk : $chunk " puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" puts stdout "END build_key ret below_index: $parts_below_index" } return $parts_below_index } } #puts stdout ">>>chunk:'$chunk'<<< split-on:$s parts: '$parts' splitchars: $splitchars -topdict:$topdict" #if {$chunk eq ""} { # puts "___________________________________________!!!____" #} #puts stdout "-->chunk:$chunk $s parts:$parts" #puts stdout "---chunk:'$chunk' part:'$part' parts:'$parts' s:'$s'" set segments [split_numeric_segments $chunk] ;#! set stringindex "" set segnum 0 foreach seg $segments { #puts stdout "=================---->seg:$seg segments:$segments" #-strict ? if {[string length $seg] && [string is digit $seg]} { set basenum [trimzero [string trim $seg]] set lengthindex "[padleft [string length $basenum] 4]d" #append stringindex "<20>$lengthindex $basenum $seg" } else { set c1 [string range $seg 0 0] #puts stdout "==============> c1'$c1' topdict: $topdict stringindex:$stringindex" if {$c1 in [dict keys $topdict]} { set tag [dict get $topdict $c1] #append stringindex "${tag}$c1" #set seg [string range $seg 1 end] } #textindex set leader "<30>" set idx $seg set idx [string trim $idx] set idx [string tolower $idx] set idx [string map $index_map $idx] #set the X-c count to match the length of the index - not the raw data set lengthindex "[padleft [string length $idx] 4]c" #append stringindex "${leader}$idx $lengthindex $texttail" } } if {[llength $parts] != 1} { error "build_key assertion fail llength parts != 1 parts:$parts" } set segtail_clearance_buffer " " ;#space to clear other split indicators if using showsplits set segtail $segtail_clearance_buffer append segtail "\[" set grouping "" set pnum 0 foreach p $parts { set sublen_list [list] set subsegments [split_numeric_segments $p] set i 0 set partsorter "" foreach sub $subsegments { ##don't trim spaces here - it would be inconsistent. Some subs are pure whitespace - others have internal whitespace. e.g "a4 400b a b2" becomes "a 4 { } 400 {b a b} 2" #mapping away all whitespace would be consistent, but not necessarily desirable. If it's in the index_map it'll happen anyway - so we don't do it manually here except for evaluating conditions. set test_trim [string trim $sub] set str $sub set str [string tolower $str] set str [string map $index_map $str] if {[string length $test_trim] && [string is digit $test_trim]} { append partsorter [trimzero $str] } else { append partsorter "$str" } append partsorter } foreach sub $subsegments { if {[string length $sub] && [string is digit $sub]} { set basenum [trimzero [string trim $sub]] set subequivs $basenum set lengthindex "[padleft [string length $subequivs] 4]d " set idx "$lengthindex [padleft $basenum 10]" ;#todo - cycle through data and determine longest set tail [overtype::left [string repeat " " 10] $sub] #set tail "" } else { set idx "" set lookahead [lindex $subsegments $i+1] if {![string length $lookahead]} { set zeronum "[padleft 0 4]d0" } else { set zeronum "" } set subequivs $sub #set subequivs [string trim $subequivs] set subequivs [string tolower $subequivs] set subequivs [string map $index_map $subequivs] append idx $subequivs append idx $zeronum set idx $subequivs # set ch "-" if {$tag_dashes} { #puts stdout "____TAG DASHES" #winlike set numleading [get_leading_char_count $seg $ch] if {$numleading > 0} { set texttail "<31-leading[padleft $numleading 4]$ch>" } else { set texttail "<30>" } set numothers [expr {[get_char_count $seg $ch] - $numleading}] if {$debug >= 2} { puts stdout "____dashcount: [get_char_count $seg $ch] numothers: $numothers" } if {$numothers > 0} { append texttail "<31-others[padleft $numothers 4]$ch>" } else { append textail "<30>" } } else { set texttail "<30>" } #set idx $partsorter set tail "" #set tail [string tolower $sub] ;#raw #set tail $partsorter #append tail ":[string tolower $p]" ;#we need the whole part - even though it makes the index much larger. !todo - tagconfig switch to toggle case sensitive sorting } append grouping "$idx $tail|$s" incr i } if {$p eq ""} { # no subsegments.. set zeronum "[padleft 0 4]d0" #append grouping "\u000$zerotail" append grouping ".$zeronum" } #append grouping | #append grouping $s #foreach len $sublen_list { # append segtail "<[padleft $len 3]>" #} incr pnum } set grouping [string trimright $grouping $s] append grouping "[padleft [llength $parts] 4]" append segtail $grouping #append segtail " <[padleft [llength $parts] 4]>" append segtail "\]" #if {[string length $seg] && [string is digit $seg]} { # append segtail "<20>" #} else { # append segtail "<30>" #} append stringindex $segtail incr segnum lappend indices $stringindex if {[llength $indices] > 1} { puts stderr "INDICES [llength $indices]: $stringindex" error "build_key assertion error deadconcept indices" } #topchar handling on splitter characters #set c1 [string range $chunk 0 0] if {$s in [dict keys $topdict]} { set tag [dict get $topdict $s] set joiner [string map [list ">" "$s>"] ${tag}] #we have split on this character $s so if the first part is empty string then $s was a leading character # we need to bring a tag out front for this, or it will be dominated by the leading sections-remaing tag # (since the empty string produces no tag of it's own - ?) if {[string length [lindex $parts 0]] == 0} { set prefix ${joiner} } else { set prefix "" } } else { #use standard character-data positioning tag if no override from topdict set joiner "<30J>$s" set prefix "" } set contentindex $prefix[join $indices $joiner] if {[string length $s]} { set split_indicator "" } else { set split_indicator "" } if {![string length $s]} { set s ~ } #return "[overtype::left [string repeat { } 80] $contentindex][overtype::left [string repeat { } 10] [list $s $chunk]]" #return $contentindex$split_indicator #return [overtype::left [string repeat - 40] $contentindex] if {$debug >= 3} { puts stdout "END build_key chunk : $chunk" puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes" puts stdout "END build_key ret contentidx : $contentindex" } return $contentindex } #---------------------------------------- #line-processors - data always last argument - opts can be empty string #all processor should accept empty opts and ignore opts if they don't use them proc _lineinput_as_tcl1 {opts line} { set out "" foreach i $line { append out "$i " } set out [string range $out 0 end-1] return $out } #should be equivalent to above proc _lineinput_as_tcl {opts line} { return [concat {*}$line] } #will put extra tcl quoting if it was already tcl-shaped e.g text "a" -> {"a"} proc _lineoutput_as_tcl {opts line} { return [regexp -inline -all {\S+} $line] } proc _lineinput_as_raw {opts line} { return $line } proc _lineoutput_as_raw {opts line} { return $line } #words is opposite of tcl proc _lineinput_as_words {opts line} { #wordlike_parts return [regexp -inline -all {\S+} $line] } proc _lineoutput_as_words {opts line} { return [concat {*}$line] } #opts same as tcllib csv::split - except without the 'line' element #?-alternate? ?sepChar? ?delChar? proc _lineinput_as_csv {opts line} { package require csv if {[lindex $opts 0] eq "-alternate"} { return [csv::split -alternate $line {*}[lrange $opts 1 end]] } else { return [csv::split $line {*}$opts] } } #opts same as tcllib csv::join #?sepChar? ?delChar? ?delMode? proc _lineoutput_as_csv {opts line} { package require csv return [csv::join $line {*}$opts] } #---------------------------------------- variable sort_flagspecs set sort_flagspecs [dict create\ -caller natsort::sort \ -return supplied|defaults \ -defaults [list -collate nocase \ -winlike 0 \ -splits "\uFFFF" \ -topchars {. _} \ -showsplits 1 \ -sortmethod ascii \ -collate "\uFFFF" \ -inputformat raw \ -inputformatapply {index data} \ -inputformatoptions "" \ -outputformat raw \ -outputformatoptions "" \ -cols "\uFFFF" \ -debug 0 -db "" -stacktrace 0 -splits "\uFFFF" -showsplits 0] \ -required {all} \ -extras {none} \ -commandprocessors {}\ ] proc sort {stringlist args} { #puts stdout "natsort::sort args: $args" variable debug variable sort_flagspecs if {![llength $stringlist]} return if {[llength $stringlist] == 1} { if {"-inputformat" ni $args && "-outputformat" ni $args} { return $stringlist } } #allow pass through of the check_flags flag -debugargs so it can be set by the caller set debugargs 0 if {[set posn [lsearch $args -debugargs]] >=0} { if {$posn == [llength $args]-1} { #-debugargs at tail of list set debugargs 1 } else { set debugargs [lindex $args $posn+1] } } #-return flagged|defaults doesn't work Review. #flagfilter global processor/allocator not working 2023-08 set opts [check_flags {*}$sort_flagspecs -debugargs $debugargs -values $args] #we can only shortcircuit input list of single element at this point if there aren't non-default -inputformat or -outputformat transformations if {[llength $stringlist] == 1} { set is_basic 1 foreach fname [list -inputformat -outputformat] { if {[dict get $sort_flagspecs -defaults $fname] ne [dict get $opts $fname]} { set is_basic 0 break } } if {$is_basic} { return $stringlist } } set winlike [dict get $opts -winlike] set topchars [dict get $opts -topchars] set cols [dict get $opts -cols] set debug [dict get $opts -debug] set stacktrace [dict get $opts -stacktrace] set showsplits [dict get $opts -showsplits] set splits [dict get $opts -splits] set sortmethod [dict get $opts -sortmethod] set opt_collate [dict get $opts -collate] set opt_inputformat [dict get $opts -inputformat] set opt_inputformatapply [dict get $opts -inputformatapply] set opt_inputformatoptions [dict get $opts -inputformatoptions] set opt_outputformat [dict get $opts -outputformat] set opt_outputformatoptions [dict get $opts -outputformatoptions] if {$debug} { #dict unset opts -showsplits #dict unset opts -splits puts stdout "natsort::sort processed_args: $opts" if {$debug == 1} { puts stdout "natsort::sort - try also -debug 2, -debug 3" } } #set sortmethod "-dictionary" ;# sorts a2b before a001b - possibly other strangenesses that are hard to reason about switch -- $sortmethod { dictionary - ascii { set sortmethod "-$sortmethod" # -ascii is default for tcl lsort. } default { set sortmethod "-ascii" } } set allowed_collations [list nocase] if {$opt_collate ne "\uFFFF"} { if {$opt_collate ni $allowed_collations} { error "natsort::sort unknown value for -collate option. Only acceptable value(s): $allowed_collations" } set nocaseopt "-$opt_collate" } else { set nocaseopt "" } set allowed_inputformats [list tcl raw csv words] switch -- $opt_inputformat { tcl - raw - csv - words {} default { error "natsort::sort unknown value for -inputformat option. Only acceptable value(s): $allowed_inputformats" } } set allowed_outputformats [list tcl raw csv words] switch -- $opt_outputformat { tcl - raw - csv - words {} default { error "natsort::sort unknown value for -outputformat option. Only acceptable value(s): $allowed_outputformats" } } # set winsplits [list / . _] set commonsplits [list / . _ -] #set commonsplits [list] set tagconfig [dict create] dict set tagconfig last_part_text_tag "<19>" if {$winlike} { set splitchars $winsplits #windows explorer sorts leading spaces at the top - which doesn't seem very helpful for keeping things together - but the explorer doesn't seem able to create leading spaces anyway. set wintop [list "(" ")" { } {.} {_}] ;#windows specific order foreach t $topchars { if {$t ni $wintop} { lappend wintop $t } } set topchars $wintop dict set tagconfig last_part_text_tag "" } else { set splitchars $commonsplits } if {$splits ne "\uFFFF"} { set splitchars $splits } dict set tagconfig original_splitchars $splitchars dict set tagconfig showsplits $showsplits #create topdict set i 0 set topdict [dict create] foreach c $topchars { incr i ;#start at 01 so that 00 reserved for final-split tag (allows x-0.1.txt to sort above x-0.1.1.txt by default. Use tagconfig to change, or choose -winlike 1 for explorer-like sorting) dict set topdict $c "<0$i>" } set keylist [list] switch -- $opt_inputformat { tcl { set lineinput_transform [list _lineinput_as_tcl $opt_inputformatoptions] } csv { set lineinput_transform [list _lineinput_as_csv $opt_inputformatoptions] } raw { set lineinput_transform [list _lineinput_as_raw $opt_inputformatoptions] } words { set lineinput_transform [list _lineinput_as_words $opt_inputformatoptions] } } switch -- $opt_outputformat { tcl { set lineoutput_transform [list _lineoutput_as_tcl $opt_outputformatoptions] } csv { set lineoutput_transform [list _lineoutput_as_csv $opt_outputformatoptions] } raw { set lineoutput_transform [list _lineoutput_as_raw $opt_outputformatoptions] } words { set lineoutput_transform [list _lineoutput_as_words $opt_outputformatoptions] } } if {("data" in $opt_inputformatapply) || ("index" in $opt_inputformatapply)} { if {$opt_inputformat eq "raw"} { set tf_stringlist $stringlist } else { set tf_stringlist [list] foreach v $stringlist { lappend tf_stringlist [{*}$lineinput_transform $v] } } if {"data" in $opt_inputformatapply} { set tf_data_stringlist $tf_stringlist } else { set tf_data_stringlist $stringlist } if {"index" in $opt_inputformatapply} { set tf_index_stringlist $tf_stringlist } else { set tf_index_stringlist $stringlist } } else { set tf_data_stringlist $stringlist set tf_index_stringlist $stringlist } if {$stacktrace} { puts stdout [natsort::stacktrace] set natsort::stacktrace_on 1 } if {$cols eq "\uFFFF"} { set colkeys [lmap v $stringlist {}] } else { set colkeys [list] foreach v $tf_index_stringlist { set lineparts $v set k [list] foreach c $cols { lappend k [lindex $lineparts $c] } lappend colkeys [join $k "_"] ;#use a common-split char - Review } } #puts stdout "colkeys: $colkeys" if {$opt_inputformat eq "raw"} { #no inputformat was applied - can just use stringlist foreach value $stringlist ck $colkeys { set contentindex [build_key $value $splitchars $topdict $tagconfig $debug] set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] lappend keylist ${colindex}-${contentindex}-$value ;#note: entire raw value used for final sort disambiguation (can be whitespace that was ignored in indexing) } } else { foreach keyinput $tf_index_stringlist datavalue $tf_data_stringlist ck $colkeys { #data may or may not have been transformed #column index may or may not have been built with transformed data set contentindex [build_key $keyinput $splitchars $topdict $tagconfig $debug] set colindex [build_key $ck $splitchars $topdict $tagconfig $debug] lappend keylist ${colindex}-${contentindex}-$datavalue ;#note: entire value used for final sort disambiguation (can be whitespace that was ignored in indexing) } } #puts stderr "keylist: $keylist" ################################################################################################### # Use the generated keylist to do the actual sorting # select either the transformed or raw data as the corresponding output ################################################################################################### if {[string length $nocaseopt]} { set sortcommand [list lsort $sortmethod $nocaseopt -indices $keylist] } else { set sortcommand [list lsort $sortmethod -indices $keylist] } if {$opt_outputformat eq "raw"} { #raw output means no further transformations - it doesn't mean there wasn't a transform applied on the input side #use the tf_data_stringlist in the output - which will be the same as the input stringlist if no input transform applied for data. #(Also - it may or may not have been *sorted* on transformed data depending on whether 'index' was in $opt_inputformatapply) foreach idx [{*}$sortcommand] { lappend result [lindex $tf_data_stringlist $idx] } } else { #we need to apply an output format #The data may or may not have been transformed at input foreach idx [{*}$sortcommand] { lappend result [{*}$lineoutput_transform [lindex $tf_data_stringlist $idx]] } } ################################################################################################### if {$debug >= 2} { set screen_width 250 set max_val 0 set max_idx 0 ##### calculate colum widths foreach i [{*}$sortcommand] { set len_val [string length [lindex $stringlist $i]] if {$len_val > $max_val} { set max_val $len_val } set len_idx [string length [lindex $keylist $i]] if {$len_idx > $max_idx} { set max_idx $len_idx } } #### set l_width [expr {$max_val + 1}] set leftcol [string repeat " " $l_width] set r_width [expr {$screen_width - $l_width - 1}] set rightcol [string repeat " " $r_width] set str [overtype::left $leftcol RAW] puts stdout " $str Index with possibly transformed data at tail" foreach i [{*}$sortcommand] { #puts stdout "|d> [overtype::left $leftcol [lindex $stringlist $i] ] [lindex $keylist $i]" set index [lindex $keylist $i] set len_idx [string length $index] set rowcount [expr {$len_idx / $r_width}] if {($len_idx % $r_width) > 0} { incr rowcount } set rows [list] for {set r 0} {$r < $rowcount} {incr r} { lappend rows [string range $index 0 $r_width-$r] set index [string range $index $r_width end] } set r 0 foreach idxpart $rows { if {$r == 0} { #use the untransformed stringlist set str [overtype::left $leftcol [lindex $stringlist $i]] } else { set str [overtype::left $leftcol ...]] } puts stdout " $str $idxpart" incr r } #puts stdout "|> '[lindex $stringlist $i]'" #puts stdout "|> [lindex $keylist $i]" } puts stdout "|debug> topdict: $topdict" puts stdout "|debug> splitchars: $splitchars" } return $result } #Note that although leading whitespace isn't a commonly used feature of filesystem names - it's possible at least on FreeBSD,windows and linux so we should try to handle it sensibly. proc sort_experiment {stringlist args} { package require sqlite3 variable debug set args [check_flags -caller natsort::sort \ -defaults [dict create -db :memory: -collate nocase -nullvalue "->NULL<" -winlike 0 -topchars [list] -debug 0] \ -extras {all} \ -values $args] set db [string trim [dict get $args -db]] set collate [string trim [dict get $args -collate]] set winlike [string trim [dict get $args -winlike]] set debug [string trim [dict get $args -debug]] set nullvalue [string trim [dict get $args -nullvalue]] set topchars [string trim [dict get $args -topchars]] set topdot [expr {"." in $topchars}] set topunderscore [expr {"_" in $topchars}] sqlite3 db_natsort2 $db #-- #our table must handle the name with the greatest number of numeric/non-numeric splits. #This means a single list member with pathological naming e.g a1a1a1a1a1a1a1a1a1a1a1.txt could greatly extend the number of columns and indices and affect performance. #review: could be optimised to aggregate the tail into a single index, as the the extra columns won't assist in ordering, but make the table and query bigger. # we should probably determine the longest common sequence of splits in the input list and add only one more index for the segment after that. set maxsegments 0 #-- set prefix "idx" #note - there will be more columns in the sorting table than segments. # (a segment equals one of the numeric & non-numeric string portions returned from 'split_numeric_sgements') #--------------------------- # consider # a123b.v1.2.txt # a123b.v1.3beta1.txt # these have the following segments: # a 123 b.v 1 . 2 .txt # a 123 b.v 1 . 3 beta 1 .txt #--------------------------- # The first string has 7 segments (numbered 0 to 6) # the second string has 9 segments # # for example when the data has any elements in a segment position that are numeric (e.g 0001 123) # - then an index column with numeric equivalents will be created (e.g 0001 becomes 1), and any non-numeric values in that column will get mapped to a negative value (for special cases) or a high value such as NULL (with NULLS LAST sql support) # # when a segment #cycle through all strings - we cannot build tabledef as we go because the column type depends on whether all segments for a particular column are text vs int-equivalent. array set segmentinfo {} foreach nm $stringlist { set segments [split_numeric_segments $nm] if {![string length [string trim [lindex $segments 0]]]} { if {[string is digit [string trim [lindex $segments 1]]]} { #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) set segments [lrange $segments 1 end] } } set c 0 ;#start of index columns if {[llength $segments] > $maxsegments} { set maxsegments [llength $segments] } foreach seg $segments { set seg [string trim $seg] set column_exists [info exists segmentinfo($c,type)] if {[string is digit $seg]} { if {$column_exists} { #override it (may currently be text or int) set segmentinfo($c,type) "int" } else { #new column set segmentinfo($c,name) ${prefix}$c set segmentinfo($c,type) "int" } } else { #text never overrides int if {!$column_exists} { set segmentinfo($c,name) ${prefix}$c set segmentinfo($c,type) "text" } } incr c } } if {$debug} { puts stdout "Largest number of num/non-num segments in data: $maxsegments" #parray segmentinfo } # set tabledef "" set ordered_column_names [list] set ordered_segmentinfo_tags [lsort -dictionary [array names segmentinfo *]] foreach k $ordered_segmentinfo_tags { lassign [split $k ,] c tag if {$tag eq "type"} { set type [set segmentinfo($k)] if {$type eq "int"} { append tabledef "$segmentinfo($c,name) int," } else { append tabledef "$segmentinfo($c,name) text COLLATE $collate," } append tabledef "raw$c text COLLATE $collate," lappend ordered_column_names $segmentinfo($c,name) lappend ordered_column_names raw$c ;#additional index column not in segmentinfo } if {$tag eq "name"} { #lappend ordered_column_names $segmentinfo($k) } } append tabledef "name text" #puts stdout "tabledef:$tabledef" db_natsort2 eval [string map [list %tabledef% $tabledef] {create table natsort(%tabledef%)}] foreach nm $stringlist { array unset intdata array set intdata {} array set rawdata {} #init array and build sql values string set sql_insert "insert into natsort values(" for {set i 0} {$i < $maxsegments} {incr i} { set intdata($i) "" set rawdata($i) "" append sql_insert "\$intdata($i),\$rawdata($i)," } append sql_insert "\$nm" ;#don't manipulate name value in any way - e.g must leave all whitespace as the sort must return exactly the same elements as in the original list. append sql_insert ")" set segments [split_numeric_segments $nm] if {![string length [string trim [lindex $segments 0]]]} { if {[string is digit [string trim [lindex $segments 1]]]} { #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though) set segments [lrange $segments 1 end] } } set values "" set c 0 foreach seg $segments { if {[set segmentinfo($c,type)] eq "int"} { if {[string is digit [string trim $seg]]} { set intdata($c) [trimzero [string trim $seg]] } else { catch {unset intdata($c)} ;#set NULL - sorts last if {($c == 0) && ($topunderscore) && [string match _* [string trim $seg]]} { set intdata($c) -100 } if {($c == 0) && ($topdot) && [string match .* [string trim $seg]]} { set intdata($c) -50 } } set rawdata($c) [string trim $seg] } else { #pure text column #set intdata($c) [string trim $seg] ;#ignore leading/trailing whitespace - we sort first on trimmed version, then refine with the sort on rawdata index #catch {unset indata($c)} set indata($c) [string trim $seg] set rawdata($c) $seg } #set rawdata($c) [string trim $seg]# #set rawdata($c) $seg incr c } db_natsort2 eval $sql_insert } set orderedlist [list] if {$debug} { db_natsort2 eval {select * from pragma_table_info('natsort')} rowdata { parray rowdata } } set orderby "order by " foreach cname $ordered_column_names { if {[string match "idx*" $cname]} { append orderby "$cname ASC NULLS LAST," } else { append orderby "$cname ASC," } } append orderby " name ASC" #append orderby " NULLS LAST" ;#?? #e.g "order by idx0 ASC, raw0 ASC, idx1 ASC .... name ASC" if {$debug} { puts stdout "orderby clause: $orderby" } db_natsort2 eval [string map [list %orderby% $orderby] {select * from natsort %orderby%}] rowdata { set line "- " #parray rowdata set columnnames $rowdata(*) #puts stdout "columnnames: $columnnames" #[lsort -dictionary [array names rowdata] append line "$rowdata(name) \n" foreach nm $columnnames { if {$nm ne "name"} { append line "$nm: $rowdata($nm) " } } #puts stdout $line #puts stdout "$rowdata(name)" lappend orderedlist $rowdata(name) } db_natsort2 close return $orderedlist } } #application section e.g this file might be linked from /usr/local/bin/natsort namespace eval natsort { namespace import ::flagfilter::check_flags proc called_directly_namematch {} { global argv0 if {[info script] eq ""} { return 0 } #see https://wiki.tcl-lang.org/page/main+script #trailing ... let's us resolve symlinks in last component of the path (could be something else like ___ but ... seems unlikely to collide with anything in the filesystem) if {[info exists argv0] && [file dirname [file normalize [file join [info script] ...]]] eq [file dirname [file normalize [file join $argv0 ...]]] } { return 1 } else { #puts stdout "norm info script: [file dirname [file normalize [file join [info script] ...]]]" #puts stdout "norm argv0 : [file dirname [file normalize [file join $argv0 ...]]]" return 0 } } #Review issues around comparing names vs using inodes (esp with respect to samba shares) proc called_directly_inodematch {} { global argv0 if {[info exists argv0] && [file exists [info script]] && [file exists $argv0]} { file stat $argv0 argv0Info file stat [info script] scriptInfo if {$argv0Info(ino) == 0 || $scriptInfo(ino) == 0 || $argv0Info(dev) == 0 || $scriptInfo(dev) == 0} { #vfs? #e.g //zipfs:/ return 0 } return [expr {$argv0Info(dev) == $scriptInfo(dev) && $argv0Info(ino) == $scriptInfo(ino)}] } else { return 0 } } if {![interp issafe]} { set is_namematch [called_directly_namematch] set is_inodematch [called_directly_inodematch] #### #review - reliability of mechanisms to determine direct calls # we don't want application being called when being used as a library, but we need it to run if called directly or from symlinks etc #-- choose a policy and leave the others commented. #set is_called_directly $is_namematch #set is_called_directly $is_inodematch #puts "NATSORT: called_directly_namematch - $is_namematch" #puts "NATSORT: called_directly_inodematch - $is_inodematch" #flush stdout set is_called_directly [expr {$is_namematch || $is_inodematch}] #set is_called_directly [expr {$is_namematch && $is_inodematch}] ### #puts stdout "called_directly_name: [called_directly_namematch] called_directly_inode: [called_directly_inodematch]" } else { #safe interp set is_called_directly 0 } proc test_pass_fail_message {pass {additional ""}} { variable test_fail_msg variable test_pass_msg if {$pass} { puts stderr $test_pass_msg } else { puts stderr $test_fail_msg } puts stderr $additional } variable test_fail_msg "XXXXXXXXXXXX FAIL XXXXXXXXXXXXX" variable test_pass_msg "------------ PASS -------------" proc test_sort_1 {args} { package require struct::list puts stderr "---$args" set args [check_flags -caller natsort:test_sort_1 -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] -values $args] puts stderr "test_sort_1 got args: $args" set unsorted_input { 2.2.2 2.2.2.2 1a.1.1 1a.2.1.1 1.12.1 1.2.1.1 1.02.1.1 1.002b.1.1 1.1.1.2 1.1.1.1 } set input { 1.1.1 1.1.1.2 1.002b.1.1 1.02.1.1 1.2.1.1 1.12.1 1a.1.1 1a.2.1.1 2.2.2 2.2.2.2 } set sorted [natsort::sort $input {*}$args] set is_match [struct::list equal $input $sorted] set msg "windows-explorer order" test_pass_fail_message $is_match $msg puts stdout [string repeat - 40] puts stdout INPUT puts stdout [string repeat - 40] foreach item $input { puts stdout $item } puts stdout [string repeat - 40] puts stdout OUTPUT puts stdout [string repeat - 40] foreach item $sorted { puts stdout $item } test_pass_fail_message $is_match $msg return [expr {!$is_match}] } proc test_sort_showsplits {args} { package require struct::list set args [check_flags -caller natsort:test_sort_1 \ -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 1 ] \ -extras {all} \ -values $args] set input1 { a-b.txt a.b.c.txt b.c-txt } set input2 { a.b.c.txt a-b.txt b.c-text } foreach {msg testlist } [list "custom-order" $input1 "windows-explorer (should work with -winlike 1)" $input2] { set sorted [natsort::sort $testlist {*}$args] set is_match [struct::list equal $testlist $sorted] test_pass_fail_message $is_match $msg puts stderr "INPUT" puts stderr "[string repeat - 40]" foreach item $testlist { puts stdout $item } puts stderr "[string repeat - 40]" puts stderr "OUTPUT" puts stderr "[string repeat - 40]" foreach item $sorted { puts stdout $item } test_pass_fail_message $is_match $msg } #return [expr {!$is_match}] } #tcl proc dispatch order - non flag items up front #trailing flags are paired even if supplied as solo flags e.g -l becomes -l 1 proc commandline_ls {args} { set operands [list] set posn 0 foreach a $args { if {![string match -* $a]} { lappend operands $a } else { set flag1_posn $posn break } incr posn } set args [lrange $args $flag1_posn end] set debug 0 set posn [lsearch $args -debug] if {$posn > 0} { if {[lindex $args $posn+1]} { set debug [lindex $args $posn+1] } } if {$debug} { puts stderr "|debug>commandline_ls got $args" } #if first operand not supplied - replace it with current working dir if {[lindex $operands 0] eq "\uFFFF"} { lset operands 0 [pwd] } set targets [list] foreach op $operands { if {$op ne "\uFFFF"} { set opchars [split [file tail $op] ""] if {"?" in $opchars || "*" in $opchars} { lappend targets $op } else { #actual file or dir set targetitem $op set targetitem [file normalize $op] if {![file exists $targetitem]} { if {$debug} { puts stderr "|debug>commandline_ls Unable to access path '$targetitem'" } } lappend targets $targetitem if {$debug} { puts stderr "|debug>commandline_ls listing for $targetitem" } } } } set args [check_flags -caller commandline_ls \ -return flagged|defaults \ -debugargs 0 \ -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 -algorithm sort] \ -required {all} \ -extras {all} \ -soloflags {-v -l} \ -commandprocessors {} \ -values $args ] if {$debug} { puts stderr "|debug>args: $args" } set algorithm [dict get $args -algorithm] dict unset args -algorithm set allfolders [list] set allfiles [list] foreach item $targets { if {[file exists $item]} { if {[file type $item] eq "directory"} { set dotfolders [glob -nocomplain -directory $item -type {d} -tail .*] set folders [glob -nocomplain -directory $item -type {d} -tail *] set allfolders [concat $allfolders $dotfolders $folders] set dotfiles [glob -nocomplain -directory $item -type {f} -tail .*] set files [glob -nocomplain -directory $item -type {f} -tail *] set allfiles [concat $allfiles $dotfiles $files] } else { #file (or link?) set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] set allfiles [concat $allfiles $files] } } else { set folders [glob -nocomplain -directory $item -type {d} -tail [file tail $item]] set allfolders [concat $allfolders $folders] set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]] set allfiles [concat $allfiles $files] } } set sorted_folders [natsort::sort $allfolders {*}$args] set sorted_files [natsort::sort $allfiles {*}$args] foreach fold $sorted_folders { puts stdout $fold } foreach file $sorted_files { puts stdout $file } return "-- ok printed to stdout [llength $sorted_folders] folders and [llength $sorted_files] files --" } package require argp argp::registerArgs commandline_test { { -showsplits boolean 0} { -stacktrace boolean 0} { -debug boolean 0} { -winlike boolean 0} { -db string ":memory:"} { -collate string "nocase"} { -algorithm string "sort"} { -topchars string "\uFFFF"} { -testlist string {10 1 30 3}} } argp::setArgsNeeded commandline_test {-stacktrace} proc commandline_test {test args} { variable testlist puts stdout "commandline_test got $args" argp::parseArgs opts puts stdout "commandline_test got [array get opts]" set args [check_flags -caller natsort_commandline \ -return flagged|defaults \ -defaults [list -db :memory: -collate nocase -testlist $testlist -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ -values $args] if {[string tolower $test] in [list "1" "true"]} { set test "sort" } else { if {![llength [info commands $test]]} { error "test $test not found" } } dict unset args -test set stacktrace [dict get $args -stacktrace] # dict unset args -stacktrace set argtestlist [dict get $args -testlist] dict unset args -testlist set debug [dict get $args -debug] set collate [dict get $args -collate] set db [dict get $args -db] set winlike [dict get $args -winlike] set topchars [dict get $args -topchars] puts stderr "|test>-----start natsort::$test--- input list size : [llength $argtestlist]" #set resultlist [$test $argtestlist -db $db -collate $collate -topchars $topchars -winlike $winlike] set resultlist [$test $argtestlist {*}$args] foreach nm $resultlist { puts stdout $nm } puts stdout "|test>-----end natsort::$test--- sorted list size: [llength $resultlist]" return "test end" } proc commandline_runtests {runtests args} { set argvals [check_flags -caller commandline_runtests \ -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits "\uFFFF" -runtests 1] \ -values $args] puts stderr "runtests args: $argvals" #set runtests [dict get $argvals -runtests] dict unset argvals -runtests dict unset argvals -algorithm puts stderr "runtests args: $argvals" #exit 0 set test_prefix "::natsort::test_sort_" if {$runtests eq "1"} { set runtests "*" } set testcommands [info commands ${test_prefix}${runtests}] if {![llength $testcommands]} { puts stderr "No test commands matched -runtests argument '$runtests'" puts stderr "Use 1 to run all tests" set alltests [info commands ${test_prefix}*] puts stderr "Valid tests are:" set prefixlen [string length $test_prefix] foreach t $alltests { set shortname [string range $t $prefixlen end] puts stderr "$t = -runtests $shortname" } } else { foreach cmd $testcommands { puts stderr [string repeat - 40] puts stderr "calling $cmd with args: '$argvals'" puts stderr [string repeat - 40] $cmd {*}$argvals } } exit 0 } proc help {args} { puts stdout "natsort::help got '$args'" return "Help not implemented" } proc natsort_pipe {args} { #PIPELINE to take input list on stdin and write sorted list to stdout #strip - from arglist #set args [check_flags -caller natsort_pipeline \ # -return all \ # -defaults [list -db :memory: -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ # -values $args] set debug [dict get $args -debug] if {$debug} { puts stderr "|debug> natsort_pipe got args:'$args'" } set algorithm [dict get $args -algorithm] dict unset args -algorithm set proclist [info commands ::natsort::sort*] set algos [list] foreach p $proclist { lappend algos [namespace tail $p] } if {$algorithm ni [list {*}$proclist {*}$algos]} { do_error "valid sort mechanisms: $algos" 2 } set input_list [list] while {![eof stdin]} { if {[gets stdin line] > 0} { lappend input_list $line } else { if {[eof stdin]} { } else { after 10 } } } if {$debug} { puts stderr "|debug> received [llength $input_list] list elements" } set resultlist [$algorithm $input_list {*}$args] if {$debug} { puts stderr "|debug> returning [llength $resultlist] list elements" } foreach r $resultlist { puts stdout $r } #exit 0 } if {($is_called_directly)} { set cmdprocessors { {helpfinal {match "^help$" dispatch natsort::help}} {helpfinal {sub -topic default "NONE"}} } #set args [check_flags \ # -caller test1 \ # -debugargs 2 \ # -return arglist \ # -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \ # -required {none} \ # -extras {all} \ # -commandprocessors $cmdprocessors \ # -values $::argv ] interp alias {} do_filter {} ::flagfilter::check_flags #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld set cmdprocessors { {helpcmd {match "^help$" dispatch natsort::help singleopts {-v}}} {helpcmd {sub -operand default \uFFFF singleopts {-l}}} {lscmd {match "^ls$" dispatch natsort::commandline_ls dispatchtype tcl dispatchglobal 1 mashopts {-l -a} singleopts {-l -a} pairopts {} longopts {--color=always}}} {lscmd {sub dir default "\uFFFF"}} {lscmd {sub dir2 default "\uFFFF"}} {lscmd {sub dir3 default "\uFFFF"}} {lscmd {sub dir4 default "\uFFFF"}} {lscmd {sub dir5 default "\uFFFF"}} {lscmd {sub dir6 default "\uFFFF"}} {runtests {match "^-tests$" dispatch natsort::commandline_runtests singleopts {-l}}} {runtests {sub testname default "1" singleopts {-l}}} {pipecmd {match "^-$" dispatch natsort::natsort_pipe dispatchtype tcl}} } set arglist [do_filter \ -debugargs 0 \ -debugargsonerror 2 \ -caller cline_dispatch1 \ -return all \ -soloflags {-v -x} \ -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ -required {all} \ -extras {all} \ -commandprocessors $cmdprocessors \ -values $::argv ] #mashopts are generally single-letter opts that can be run together e.g -l -d as -ld set cmdprocessors { {testcmd {match "^test$" dispatch natsort::commandline_test singleopts {-l}}} {testcmd {sub testname default "1" singleopts {-l}}} } set arglist [check_flags \ -debugargs 0 \ -caller cline_dispatch2 \ -return all \ -soloflags {-v -l} \ -defaults [list -collate nocase -algorithm sort -testlist "1 2 3 10" -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \ -required {all} \ -extras {all} \ -commandprocessors $cmdprocessors \ -values $::argv ] #set cmdprocessors [list] #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors $cmdprocessors -values $::argv ] #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {-cmd {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {{-cmd -default help} {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ] #set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {ls {ls lsdir -default "\uFFFF"}} -values $::argv ] puts stderr "natsort directcall exit" flush stderr exit 0 if {$::argc} { } } } package provide natsort [namespace eval natsort { variable version set version 0.1.1.6 }]