You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1961 lines
75 KiB

#! /usr/bin/env tclsh
package require flagfilter
namespace import ::flagfilter::check_flags
namespace eval natsort {
#REVIEW - determine and document the purpose of scriptdir being added to tm path
proc scriptdir {} {
set possibly_linked_script [file dirname [file normalize [file join [info script] __dummy__]]]
if {[file isdirectory $possibly_linked_script]} {
return $possibly_linked_script
} else {
return [file dirname $possibly_linked_script]
}
}
if {![interp issafe]} {
set sdir [scriptdir]
#puts stderr "natsort tcl::tm::add $sdir"
if {$sdir ni [tcl::tm::list]} {
catch {tcl::tm::add $sdir}
}
}
}
namespace eval natsort {
variable stacktrace_on 0
proc do_error {msg {then error}} {
#note we exit or error out even if debug selected - as every do_error call is meant to interrupt code processing at the site of call
#this is not just a 'logging' call even though it has log-like descriptors
lassign $then type code
if {$code eq ""} {
set code 1
}
set type [string tolower $type]
set levels [list debug info notice warn error critical]
if {$type in [concat $levels exit]} {
puts stderr "|$type> $msg"
} else {
puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be one of '$levels' or 'exit <numericcode>'"
}
flush stderr
if {$::tcl_interactive} {
#may not always be desirable - but assumed to be more useful not to exit despite request, to aid in debugging
if {[string tolower $type] eq "exit"} {
puts stderr " (exit suppressed due to tcl_interactive - raising error instead)"
if {![string is digit -strict $code]} {
puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be: 'exit <numericcode>'"
}
flush stderr
}
return -code error $msg
} else {
if {$type ne "exit"} {
return -code error $msg
} else {
if {[string is digit -strict $code]} {
exit $code
} else {
puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be 'error' or 'exit <numericcode>'"
return -code error $msg
}
}
}
}
variable debug 0
variable testlist
set testlist {
00.test-firstposition.txt
0001.blah.txt
1.test-sorts-after-all-leadingzero-number-one-equivs.txt
1010.thousand-and-ten.second.txt
01010.thousand-and-ten.first.txt
0001.aaa.txt
001.zzz.txt
08.octal.txt-last-octal
008.another-octal-first-octal.txt
08.again-second-octal.txt
001.a.txt
0010.reconfig.txt
010.etc.txt
005.etc.01.txt
005.Etc.02.txt
005.123.abc.txt
200.somewhere.txt
2zzzz.before-somewhere.txt
00222-after-somewhere.txt
005.00010.abc.txt
005.a3423bc.00010.abc.txt
005.001.abc.txt
005.etc.1010.txt
005.etc.010.txt
005.etc.10.txt
" 005.etc.10.txt"
005.etc.001.txt
20.somewhere.txt
4611686018427387904999999999-bignum.txt
4611686018427387903-bigishnum.txt
9223372036854775807-bigint.txt
etca-a
etc-a
etc2-a
a0001blah.txt
a010.txt
winlike-sort-difference-0.1.txt
winlike-sort-difference-0.1.1.txt
a1.txt
b1-a0001blah.txt
b1-a010.txt
b1-a1.txt
-a1.txt
--a1.txt
--a10.txt
2.high-two.yml
02.higher-two.yml
reconfig.txt
_common.stuff.txt
CASETEST.txt
casetest.txt
something.txt
some~thing.txt
someathing.txt
someThing.txt
thing.txt
thing_revised.txt
thing-revised.txt
"thing revised.txt"
"spacetest.txt"
" spacetest.txt"
" spacetest.txt"
"spacetest2.txt"
"spacetest 2.txt"
"spacetest02.txt"
name.txt
name2.txt
"name .txt"
"name2 .txt"
blah.txt
combined.txt
a001.txt
.test
.ssh
"Feb 10.txt"
"Feb 8.txt"
1ab23v23v3r89ad8a8a8a9d.txt
"Folder (10)/file.tar.gz"
"Folder/file.tar.gz"
"Folder (1)/file (1).tar.gz"
"Folder (1)/file.tar.gz"
"Folder (01)/file.tar.gz"
"Folder1/file.tar.gz"
"Folder(1)/file.tar.gz"
}
lappend testlist "Some file.txt"
lappend testlist " Some extra file1.txt"
lappend testlist " Some extra file01.txt"
lappend testlist " some extra file1.txt"
lappend testlist " Some extra file003.txt"
lappend testlist " Some file.txt"
lappend testlist "Some extra file02.txt"
lappend testlist "Program Files (x86)"
lappend testlist "01999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999991-bigger-pathologically-bignum.txt"
lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt"
lappend testlist "29999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-smaller-pathologically-bignum.txt"
lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 1.txt"
lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 01.txt"
lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt"
lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a01a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt"
lappend testlist "b1b1b1b1.txt"
lappend testlist "b1b01z1z1.txt"
lappend testlist "c1c111c1.txt"
lappend testlist "c1c1c1c1.txt"
namespace eval overtype {
proc right {args} {
# @d !todo - implement overflow, length checks etc
if {[llength $args] < 2} {
error {usage: ?-overflow [1|0]? undertext overtext}
}
foreach {undertext overtext} [lrange $args end-1 end] break
set opt(-overflow) 0
array set opt [lrange $args 0 end-2]
set olen [string length $overtext]
set ulen [string length $undertext]
if {$opt(-overflow)} {
return [string range $undertext 0 end-$olen]$overtext
} else {
if {$olen > $ulen} {
set diff [expr {$olen - $ulen}]
return [string range $undertext 0 end-$olen][string range $overtext 0 end-$diff]
} else {
return [string range $undertext 0 end-$olen]$overtext
}
}
}
proc left {args} {
# @c overtype starting at left (overstrike)
# @c can/should we use something like this?: 'format "%-*s" $len $overtext
if {[llength $args] < 2} {
error {usage: ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext}
}
foreach {undertext overtext} [lrange $args end-1 end] break
set opt(-ellipsis) 0
set opt(-ellipsistext) {...}
set opt(-overflow) 0
array set opt [lrange $args 0 end-2]
set len [string length $undertext]
set overlen [string length $overtext]
set diff [expr {$overlen - $len}]
#puts stdout "====================>overtype: datalen:$len overlen:$overlen diff:$diff"
#puts stdout "====================>overtype: data: $overtext"
if {$diff > 0} {
if {$opt(-overflow)} {
return $overtext
} else {
if {$opt(-ellipsis)} {
return [overtype::left [string range $overtext 0 [expr {$len -1}]] $opt(-ellipsistext)]
} else {
return [string range $overtext 0 [expr {$len -1}]]
}
}
} else {
return "$overtext[string range $undertext $overlen end]"
}
}
}
#considered using hex to make large numbers more compact for viewing in debug output - but it's not that much shorter and probably obscures more than it helps.
proc hex2dec {largeHex} {
#todo - use punk::lib::hex2dec - (scan supports ll so can do larger hex values directly)
set res 0
set largeHex [string map {_ {}} $largeHex]
if {[string length $largeHex] <=7} {
#scan can process up to FFFFFFF and does so quickly
return [scan $largeHex %x]
}
foreach hexDigit [split $largeHex {}] {
set new 0x$hexDigit
set res [expr {16*$res + $new}]
}
return $res
}
proc dec2hex {decimalNumber} {
format %4.4llX $decimalNumber
}
#punk::lib::trimzero
proc trimzero {number} {
set trimmed [string trimleft $number 0]
if {[string length $trimmed] == 0} {
set trimmed 0
}
return $trimmed
}
#todo - consider human numeric split
#e.g consider SI suffixes k|KMGTPEZY in that order
#in this context, for natural sorting - numeric segments don't contain underscores or other punctuation such as . - + etc.
#review - what about unicode equivalents such as wide numerals \UFF10 to \UFF19? unicode normalization?
proc split_numeric_segments {name} {
set segments [list]
while {[string length $name]} {
if {[scan $name {%[0-9]%n} chunk len] == 2} {
lappend segments $chunk
set name [string range $name $len end]
}
if {[scan $name {%[^0-9]%n} chunk len] == 2} {
lappend segments $chunk
set name [string range $name $len end]
}
}
return $segments
}
proc padleft {str count {ch " "}} {
set val [string repeat $ch $count]
append val $str
set diff [expr {max(0,$count - [string length $str])}]
set offset [expr {max(0,$count - $diff)}]
set val [string range $val $offset end]
}
# Sqlite may have limited collation sequences available in default builds.
# with custom builds - there may be others such as 'natsort' - see https://sqlite.org/forum/forumpost/e4dc6f3331
# This is of limited use with the few builtin collations available in 2023 ie binary,nocase & rtrim
# but may provide a quicker,flexible sort option, especially if/when more collation sequences are added to sqlite
# There are also prebuilt packages such as sqlite3-icu which allows things like "SELECT icu_load_collation('en_AU', 'australian');"
proc sort_sqlite {stringlist args} {
package require sqlite3
set args [check_flags -caller natsort_sqlite -defaults [list -db :memory: -collate nocase -winlike 0 -topchars "\uFFFF" -debug 0 -splitchars [list / . - _] -extras {all}] -values $args]
set db [string trim [dict get $args -db]]
set collate [string trim [dict get $args -collate]]
set debug [string trim [dict get $args -debug]]
set topchars [string trim [dict get $args -topchars]]
set topdot [expr {"." in $topchars}]
set topunderscore [expr {"_" in $topchars}]
sqlite3 db_sort_basic $db
set orderedlist [list]
db_sort_basic eval [string map [list %collate% $collate] {create table sqlitesort(index0 text COLLATE %collate%, name text COLLATE %collate%)}]
foreach nm $stringlist {
set segments [split_numeric_segments $nm]
set index ""
set s 0
foreach seg $segments {
if {($s == 0) && ![string length [string trim $seg]]} {
#don't index leading space
} elseif {($s == 0) && ($topunderscore) && [string match _* [string trim $seg]]} {
append index "[padleft "0" 5]-d -100 topunderscore "
append index [string trim $seg]
} elseif {($s == 0) && ($topdot) && [string match .* [string trim $seg]]} {
append index "[padleft "0" 5]-d -50 topdot "
append index [string trim $seg]
} else {
if {[string is digit [string trim $seg]]} {
set basenum [trimzero [string trim $seg]]
set lengthindex "[padleft [string length $basenum] 5]-d"
append index "$lengthindex "
#append index [padleft $basenum 40]
append index $basenum
} else {
append index [string trim $seg]
}
}
incr s
}
puts stdout ">>$index"
db_sort_basic eval {insert into sqlitesort values($index,$nm)}
}
db_sort_basic eval [string map [list %collate% $collate] {select name from sqlitesort order by index0 COLLATE %collate% ASC, name COLLATE %collate% ASC }] {
lappend orderedlist $name
}
db_sort_basic close
return $orderedlist
}
proc get_leading_char_count {str char} {
#todo - something more elegant? regex?
set count 0
foreach c [split $str "" ] {
if {$c eq $char} {
incr count
} else {
break
}
}
return $count
}
proc stacktrace {} {
set stack "Stack trace:\n"
for {set i 1} {$i < [info level]} {incr i} {
set lvl [info level -$i]
set pname [lindex $lvl 0]
append stack [string repeat " " $i]$pname
if {![catch {info args $pname} pargs]} {
foreach value [lrange $lvl 1 end] arg $pargs {
if {$value eq ""} {
if {$arg != 0} {
info default $pname $arg value
}
}
append stack " $arg='$value'"
}
} else {
append stack " !unknown vars for $pname"
}
append stack \n
}
return $stack
}
proc get_char_count {str char} {
#faster than lsearch on split for str of a few K
expr {[tcl::string::length $str]-[tcl::string::length [tcl::string::map "$char {}" $str]]}
}
proc build_key {chunk splitchars topdict tagconfig debug} {
variable stacktrace_on
if {$stacktrace_on} {
puts stderr "+++>[stacktrace]"
}
set index_map [list - "" _ ""]
#e.g - need to maintain the order
#a b.txt
#a book.txt
#ab.txt
#abacus.txt
set original_splitchars [dict get $tagconfig original_splitchars]
# tag_dashes test moved from loop - review
set tag_dashes 0
if {![string length [dict get $tagconfig last_part_text_tag]]} {
#winlike
set tag_dashes 1
}
if {("-" ni $original_splitchars)} {
set tag_dashes 1
}
if {$debug >= 3} {
puts stdout "START build_key chunk : $chunk"
puts stdout "START build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes"
}
## index_map will have no effect if we've already split on the char anyway(?)
#foreach m [dict keys $index_map] {
# if {$m in $original_splitchars} {
# dict unset index_map $m
# }
#}
#if {![string length $chunk]} return
set result ""
if {![llength $splitchars]} {
#no more structural splits - but we need to examine numeric/non-numeric segments at the lowest level.
# we are at a leaf in the recursive split hierarchy
set s "" ;#we never actually split on "" (unless that was put in splitchars.. but it probably shouldn't be)
set parts [list $chunk] ;#important to treat as list or leading/trailing whitespace lost
} else {
set s [lindex $splitchars 0]
if {"spudbucket$s" in "[split $chunk {}]"} {
error "dead-branch spudbucket"
set partindex [build_key $chunk [lrange $splitchars 1 end] $topdict $tagconfig $debug]
if {[dict get $tagconfig showsplits]} {
set pfx "(1${s}=)" ;# = sorts before _
set partindex ${pfx}$partindex
}
return $partindex
} else {
set parts_below_index ""
if {$s ni [split $chunk ""]} {
#$s can be an empty string
set parts [list $chunk]
} else {
set parts [split $chunk $s] ;#whitespace preserved - even if splitting on s that is not in string.
}
#assert - we have a splitchar $s that is in the chunk - so at least one part
if {(![string length $s] || [llength $parts] == 0)} {
error "buld_key assertion false empty split char and/or no parts"
}
set pnum 1 ;# 1 based for clarity of reading index in debug output
set subpart_count [llength $parts]
set sub_splits [lrange $splitchars 1 end] ;#pass same splitchars to each subpart
foreach p $parts {
set partindex [build_key $p $sub_splits $topdict $tagconfig $debug]
set lastpart [expr {$pnum == $subpart_count}]
#######################
set showsplits [dict get $tagconfig showsplits]
#split prefixing experiment - maybe not suitable for general use - as it affects sort order
#note that pfx must be consistent until last one, no matter how many partnumbers there are in total.
# we don't want to influence sort order before reaching end.
#e.g for:
#(1.=)...
#(1._)...(2._)...(3.=)
#(1._)...(2.=)
#Note that this is probably more suitable for highly structure dependant sorts where the results are maybe less.. natural.
if {$showsplits} {
if {$lastpart} {
set pfx "(${pnum}${s}_"
#set pfx "(${pnum}${s}=)" ;# = sorts before _
} else {
set pfx "(${pnum}${s}_"
}
append parts_below_index $pfx
}
#######################
if {$lastpart} {
if {[string length $p] && [string is digit $p]} {
set last_part_tag "<22${s}>"
} else {
set last_part_tag "<33${s}>"
}
set last_part_text_tag [dict get $tagconfig last_part_text_tag]
#for -winlike 1 there is no tag configured. Windows explorer likes to put things in the order:
# module-0.1.1.tm
# module-0.1.1.2.tm
# module-0.1.tm
# arguably -winlike 0 is more natural/human
# module-0.1.tm
# module-0.1.1.tm
# module-0.1.1.2.tm
if {[string length $last_part_text_tag]} {
#replace only the first text-tag (<30>) from the subpart_index
if {[string match "<30?>*" $partindex]} {
#give textual string index a specific tag for last part in split only. e.g <130> for lower than integers
set partindex "<130>[string range $partindex 5 end]"
}
#append parts_below_index $last_part_tag
}
#set partindex $last_part_tag$partindex
}
append parts_below_index $partindex
if {$showsplits} {
if {$lastpart} {
set suffix "${pnum}${s}=)" ;# = sorts before _
} else {
set suffix "${pnum}${s}_)"
}
append parts_below_index $suffix
}
incr pnum
}
append parts_below_index "" ;# don't add anything at the tail that may perturb sort order
if {$debug >= 3} {
set pad [string repeat " " 20]
puts stdout "END build_key chunk : $chunk "
puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes"
puts stdout "END build_key ret below_index: $parts_below_index"
}
return $parts_below_index
}
}
#puts stdout ">>>chunk:'$chunk'<<< split-on:$s parts: '$parts' splitchars: $splitchars -topdict:$topdict"
#if {$chunk eq ""} {
# puts "___________________________________________!!!____"
#}
#puts stdout "-->chunk:$chunk $s parts:$parts"
#puts stdout "---chunk:'$chunk' part:'$part' parts:'$parts' s:'$s'"
set segments [split_numeric_segments $chunk] ;#!
set stringindex ""
set segnum 0
foreach seg $segments {
#puts stdout "=================---->seg:$seg segments:$segments"
#-strict ?
if {[string length $seg] && [string is digit $seg]} {
set basenum [trimzero [string trim $seg]]
set lengthindex "[padleft [string length $basenum] 4]d"
#append stringindex "<20>$lengthindex $basenum $seg"
} else {
set c1 [string range $seg 0 0]
#puts stdout "==============> c1'$c1' topdict: $topdict stringindex:$stringindex"
if {$c1 in [dict keys $topdict]} {
set tag [dict get $topdict $c1]
#append stringindex "${tag}$c1"
#set seg [string range $seg 1 end]
}
#textindex
set leader "<30>"
set idx $seg
set idx [string trim $idx]
set idx [string tolower $idx]
set idx [string map $index_map $idx]
#set the X-c count to match the length of the index - not the raw data
set lengthindex "[padleft [string length $idx] 4]c"
#append stringindex "${leader}$idx $lengthindex $texttail"
}
}
if {[llength $parts] != 1} {
error "build_key assertion fail llength parts != 1 parts:$parts"
}
set segtail_clearance_buffer " " ;#space to clear other split indicators if using showsplits
set segtail $segtail_clearance_buffer
append segtail "\["
set grouping ""
set pnum 0
foreach p $parts {
set sublen_list [list]
set subsegments [split_numeric_segments $p]
set i 0
set partsorter ""
foreach sub $subsegments {
##don't trim spaces here - it would be inconsistent. Some subs are pure whitespace - others have internal whitespace. e.g "a4 400b a b2" becomes "a 4 { } 400 {b a b} 2"
#mapping away all whitespace would be consistent, but not necessarily desirable. If it's in the index_map it'll happen anyway - so we don't do it manually here except for evaluating conditions.
set test_trim [string trim $sub]
set str $sub
set str [string tolower $str]
set str [string map $index_map $str]
if {[string length $test_trim] && [string is digit $test_trim]} {
append partsorter [trimzero $str]
} else {
append partsorter "$str"
}
append partsorter
}
foreach sub $subsegments {
if {[string length $sub] && [string is digit $sub]} {
set basenum [trimzero [string trim $sub]]
set subequivs $basenum
set lengthindex "[padleft [string length $subequivs] 4]d "
set idx "$lengthindex [padleft $basenum 10]" ;#todo - cycle through data and determine longest
set tail [overtype::left [string repeat " " 10] $sub]
#set tail ""
} else {
set idx ""
set lookahead [lindex $subsegments $i+1]
if {![string length $lookahead]} {
set zeronum "[padleft 0 4]d0"
} else {
set zeronum ""
}
set subequivs $sub
#set subequivs [string trim $subequivs]
set subequivs [string tolower $subequivs]
set subequivs [string map $index_map $subequivs]
append idx $subequivs
append idx $zeronum
set idx $subequivs
#<removed tag_dashes test put above - review>
set ch "-"
if {$tag_dashes} {
#puts stdout "____TAG DASHES"
#winlike
set numleading [get_leading_char_count $seg $ch]
if {$numleading > 0} {
set texttail "<31-leading[padleft $numleading 4]$ch>"
} else {
set texttail "<30>"
}
set numothers [expr {[get_char_count $seg $ch] - $numleading}]
if {$debug >= 2} {
puts stdout "____dashcount: [get_char_count $seg $ch] numothers: $numothers"
}
if {$numothers > 0} {
append texttail "<31-others[padleft $numothers 4]$ch>"
} else {
append textail "<30>"
}
} else {
set texttail "<30>"
}
#set idx $partsorter
set tail ""
#set tail [string tolower $sub] ;#raw
#set tail $partsorter
#append tail ":[string tolower $p]" ;#we need the whole part - even though it makes the index much larger. !todo - tagconfig switch to toggle case sensitive sorting
}
append grouping "$idx $tail<end>|$s"
incr i
}
if {$p eq ""} {
# no subsegments..
set zeronum "[padleft 0 4]d0"
#append grouping "\u000$zerotail"
append grouping ".$zeronum"
}
#append grouping |
#append grouping $s
#foreach len $sublen_list {
# append segtail "<[padleft $len 3]>"
#}
incr pnum
}
set grouping [string trimright $grouping $s]
append grouping "[padleft [llength $parts] 4]"
append segtail $grouping
#append segtail " <[padleft [llength $parts] 4]>"
append segtail "\]"
#if {[string length $seg] && [string is digit $seg]} {
# append segtail "<20>"
#} else {
# append segtail "<30>"
#}
append stringindex $segtail
incr segnum
lappend indices $stringindex
if {[llength $indices] > 1} {
puts stderr "INDICES [llength $indices]: $stringindex"
error "build_key assertion error deadconcept indices"
}
#topchar handling on splitter characters
#set c1 [string range $chunk 0 0]
if {$s in [dict keys $topdict]} {
set tag [dict get $topdict $s]
set joiner [string map [list ">" "$s>"] ${tag}]
#we have split on this character $s so if the first part is empty string then $s was a leading character
# we need to bring a tag out front for this, or it will be dominated by the leading sections-remaing tag <S..>
# (since the empty string produces no tag of it's own - ?)
if {[string length [lindex $parts 0]] == 0} {
set prefix ${joiner}
} else {
set prefix ""
}
} else {
#use standard character-data positioning tag if no override from topdict
set joiner "<30J>$s"
set prefix ""
}
set contentindex $prefix[join $indices $joiner]
if {[string length $s]} {
set split_indicator "<split$s $chunk>"
} else {
set split_indicator "<split$s $chunk>"
}
if {![string length $s]} {
set s ~
}
#return "[overtype::left [string repeat { } 80] $contentindex][overtype::left [string repeat { } 10] [list $s $chunk]]"
#return $contentindex$split_indicator
#return [overtype::left [string repeat - 40] $contentindex]
if {$debug >= 3} {
puts stdout "END build_key chunk : $chunk"
puts stdout "END build_key splitchars : $splitchars $topdict $tagconfig NO tag dashes"
puts stdout "END build_key ret contentidx : $contentindex"
}
return $contentindex
}
#----------------------------------------
#line-processors - data always last argument - opts can be empty string
#all processor should accept empty opts and ignore opts if they don't use them
proc _lineinput_as_tcl1 {opts line} {
set out ""
foreach i $line {
append out "$i "
}
set out [string range $out 0 end-1]
return $out
}
#should be equivalent to above
proc _lineinput_as_tcl {opts line} {
return [concat {*}$line]
}
#will put extra tcl quoting if it was already tcl-shaped e.g text "a" -> {"a"}
proc _lineoutput_as_tcl {opts line} {
return [regexp -inline -all {\S+} $line]
}
proc _lineinput_as_raw {opts line} {
return $line
}
proc _lineoutput_as_raw {opts line} {
return $line
}
#words is opposite of tcl
proc _lineinput_as_words {opts line} {
#wordlike_parts
return [regexp -inline -all {\S+} $line]
}
proc _lineoutput_as_words {opts line} {
return [concat {*}$line]
}
#opts same as tcllib csv::split - except without the 'line' element
#?-alternate? ?sepChar? ?delChar?
proc _lineinput_as_csv {opts line} {
package require csv
if {[lindex $opts 0] eq "-alternate"} {
return [csv::split -alternate $line {*}[lrange $opts 1 end]]
} else {
return [csv::split $line {*}$opts]
}
}
#opts same as tcllib csv::join
#?sepChar? ?delChar? ?delMode?
proc _lineoutput_as_csv {opts line} {
package require csv
return [csv::join $line {*}$opts]
}
#----------------------------------------
variable sort_flagspecs
set sort_flagspecs [dict create\
-caller natsort::sort \
-return supplied|defaults \
-defaults [list -collate nocase \
-winlike 0 \
-splits "\uFFFF" \
-topchars {. _} \
-showsplits 1 \
-sortmethod ascii \
-collate "\uFFFF" \
-inputformat raw \
-inputformatapply {index data} \
-inputformatoptions "" \
-outputformat raw \
-outputformatoptions "" \
-cols "\uFFFF" \
-debug 0 -db "" -stacktrace 0 -splits "\uFFFF" -showsplits 0] \
-required {all} \
-extras {none} \
-commandprocessors {}\
]
proc sort {stringlist args} {
#puts stdout "natsort::sort args: $args"
variable debug
variable sort_flagspecs
if {![llength $stringlist]} return
if {[llength $stringlist] == 1} {
if {"-inputformat" ni $args && "-outputformat" ni $args} {
return $stringlist
}
}
#allow pass through of the check_flags flag -debugargs so it can be set by the caller
set debugargs 0
if {[set posn [lsearch $args -debugargs]] >=0} {
if {$posn == [llength $args]-1} {
#-debugargs at tail of list
set debugargs 1
} else {
set debugargs [lindex $args $posn+1]
}
}
#-return flagged|defaults doesn't work Review.
#flagfilter global processor/allocator not working 2023-08
set opts [check_flags {*}$sort_flagspecs -debugargs $debugargs -values $args]
#we can only shortcircuit input list of single element at this point if there aren't non-default -inputformat or -outputformat transformations
if {[llength $stringlist] == 1} {
set is_basic 1
foreach fname [list -inputformat -outputformat] {
if {[dict get $sort_flagspecs -defaults $fname] ne [dict get $opts $fname]} {
set is_basic 0
break
}
}
if {$is_basic} {
return $stringlist
}
}
set winlike [dict get $opts -winlike]
set topchars [dict get $opts -topchars]
set cols [dict get $opts -cols]
set debug [dict get $opts -debug]
set stacktrace [dict get $opts -stacktrace]
set showsplits [dict get $opts -showsplits]
set splits [dict get $opts -splits]
set sortmethod [dict get $opts -sortmethod]
set opt_collate [dict get $opts -collate]
set opt_inputformat [dict get $opts -inputformat]
set opt_inputformatapply [dict get $opts -inputformatapply]
set opt_inputformatoptions [dict get $opts -inputformatoptions]
set opt_outputformat [dict get $opts -outputformat]
set opt_outputformatoptions [dict get $opts -outputformatoptions]
if {$debug} {
#dict unset opts -showsplits
#dict unset opts -splits
puts stdout "natsort::sort processed_args: $opts"
if {$debug == 1} {
puts stdout "natsort::sort - try also -debug 2, -debug 3"
}
}
#set sortmethod "-dictionary" ;# sorts a2b before a001b - possibly other strangenesses that are hard to reason about
switch -- $sortmethod {
dictionary - ascii {
set sortmethod "-$sortmethod"
# -ascii is default for tcl lsort.
}
default {
set sortmethod "-ascii"
}
}
set allowed_collations [list nocase]
if {$opt_collate ne "\uFFFF"} {
if {$opt_collate ni $allowed_collations} {
error "natsort::sort unknown value for -collate option. Only acceptable value(s): $allowed_collations"
}
set nocaseopt "-$opt_collate"
} else {
set nocaseopt ""
}
set allowed_inputformats [list tcl raw csv words]
switch -- $opt_inputformat {
tcl - raw - csv - words {}
default {
error "natsort::sort unknown value for -inputformat option. Only acceptable value(s): $allowed_inputformats"
}
}
set allowed_outputformats [list tcl raw csv words]
switch -- $opt_outputformat {
tcl - raw - csv - words {}
default {
error "natsort::sort unknown value for -outputformat option. Only acceptable value(s): $allowed_outputformats"
}
}
#
set winsplits [list / . _]
set commonsplits [list / . _ -]
#set commonsplits [list]
set tagconfig [dict create]
dict set tagconfig last_part_text_tag "<19>"
if {$winlike} {
set splitchars $winsplits
#windows explorer sorts leading spaces at the top - which doesn't seem very helpful for keeping things together - but the explorer doesn't seem able to create leading spaces anyway.
set wintop [list "(" ")" { } {.} {_}] ;#windows specific order
foreach t $topchars {
if {$t ni $wintop} {
lappend wintop $t
}
}
set topchars $wintop
dict set tagconfig last_part_text_tag ""
} else {
set splitchars $commonsplits
}
if {$splits ne "\uFFFF"} {
set splitchars $splits
}
dict set tagconfig original_splitchars $splitchars
dict set tagconfig showsplits $showsplits
#create topdict
set i 0
set topdict [dict create]
foreach c $topchars {
incr i ;#start at 01 so that 00 reserved for final-split tag (allows x-0.1.txt to sort above x-0.1.1.txt by default. Use tagconfig to change, or choose -winlike 1 for explorer-like sorting)
dict set topdict $c "<0$i>"
}
set keylist [list]
switch -- $opt_inputformat {
tcl {
set lineinput_transform [list _lineinput_as_tcl $opt_inputformatoptions]
}
csv {
set lineinput_transform [list _lineinput_as_csv $opt_inputformatoptions]
}
raw {
set lineinput_transform [list _lineinput_as_raw $opt_inputformatoptions]
}
words {
set lineinput_transform [list _lineinput_as_words $opt_inputformatoptions]
}
}
switch -- $opt_outputformat {
tcl {
set lineoutput_transform [list _lineoutput_as_tcl $opt_outputformatoptions]
}
csv {
set lineoutput_transform [list _lineoutput_as_csv $opt_outputformatoptions]
}
raw {
set lineoutput_transform [list _lineoutput_as_raw $opt_outputformatoptions]
}
words {
set lineoutput_transform [list _lineoutput_as_words $opt_outputformatoptions]
}
}
if {("data" in $opt_inputformatapply) || ("index" in $opt_inputformatapply)} {
if {$opt_inputformat eq "raw"} {
set tf_stringlist $stringlist
} else {
set tf_stringlist [list]
foreach v $stringlist {
lappend tf_stringlist [{*}$lineinput_transform $v]
}
}
if {"data" in $opt_inputformatapply} {
set tf_data_stringlist $tf_stringlist
} else {
set tf_data_stringlist $stringlist
}
if {"index" in $opt_inputformatapply} {
set tf_index_stringlist $tf_stringlist
} else {
set tf_index_stringlist $stringlist
}
} else {
set tf_data_stringlist $stringlist
set tf_index_stringlist $stringlist
}
if {$stacktrace} {
puts stdout [natsort::stacktrace]
set natsort::stacktrace_on 1
}
if {$cols eq "\uFFFF"} {
set colkeys [lmap v $stringlist {}]
} else {
set colkeys [list]
foreach v $tf_index_stringlist {
set lineparts $v
set k [list]
foreach c $cols {
lappend k [lindex $lineparts $c]
}
lappend colkeys [join $k "_"] ;#use a common-split char - Review
}
}
#puts stdout "colkeys: $colkeys"
if {$opt_inputformat eq "raw"} {
#no inputformat was applied - can just use stringlist
foreach value $stringlist ck $colkeys {
set contentindex [build_key $value $splitchars $topdict $tagconfig $debug]
set colindex [build_key $ck $splitchars $topdict $tagconfig $debug]
lappend keylist ${colindex}-${contentindex}-$value ;#note: entire raw value used for final sort disambiguation (can be whitespace that was ignored in indexing)
}
} else {
foreach keyinput $tf_index_stringlist datavalue $tf_data_stringlist ck $colkeys {
#data may or may not have been transformed
#column index may or may not have been built with transformed data
set contentindex [build_key $keyinput $splitchars $topdict $tagconfig $debug]
set colindex [build_key $ck $splitchars $topdict $tagconfig $debug]
lappend keylist ${colindex}-${contentindex}-$datavalue ;#note: entire value used for final sort disambiguation (can be whitespace that was ignored in indexing)
}
}
#puts stderr "keylist: $keylist"
###################################################################################################
# Use the generated keylist to do the actual sorting
# select either the transformed or raw data as the corresponding output
###################################################################################################
if {[string length $nocaseopt]} {
set sortcommand [list lsort $sortmethod $nocaseopt -indices $keylist]
} else {
set sortcommand [list lsort $sortmethod -indices $keylist]
}
if {$opt_outputformat eq "raw"} {
#raw output means no further transformations - it doesn't mean there wasn't a transform applied on the input side
#use the tf_data_stringlist in the output - which will be the same as the input stringlist if no input transform applied for data.
#(Also - it may or may not have been *sorted* on transformed data depending on whether 'index' was in $opt_inputformatapply)
foreach idx [{*}$sortcommand] {
lappend result [lindex $tf_data_stringlist $idx]
}
} else {
#we need to apply an output format
#The data may or may not have been transformed at input
foreach idx [{*}$sortcommand] {
lappend result [{*}$lineoutput_transform [lindex $tf_data_stringlist $idx]]
}
}
###################################################################################################
if {$debug >= 2} {
set screen_width 250
set max_val 0
set max_idx 0
##### calculate colum widths
foreach i [{*}$sortcommand] {
set len_val [string length [lindex $stringlist $i]]
if {$len_val > $max_val} {
set max_val $len_val
}
set len_idx [string length [lindex $keylist $i]]
if {$len_idx > $max_idx} {
set max_idx $len_idx
}
}
####
set l_width [expr {$max_val + 1}]
set leftcol [string repeat " " $l_width]
set r_width [expr {$screen_width - $l_width - 1}]
set rightcol [string repeat " " $r_width]
set str [overtype::left $leftcol RAW]
puts stdout " $str Index with possibly transformed data at tail"
foreach i [{*}$sortcommand] {
#puts stdout "|d> [overtype::left $leftcol [lindex $stringlist $i] ] [lindex $keylist $i]"
set index [lindex $keylist $i]
set len_idx [string length $index]
set rowcount [expr {$len_idx / $r_width}]
if {($len_idx % $r_width) > 0} {
incr rowcount
}
set rows [list]
for {set r 0} {$r < $rowcount} {incr r} {
lappend rows [string range $index 0 $r_width-$r]
set index [string range $index $r_width end]
}
set r 0
foreach idxpart $rows {
if {$r == 0} {
#use the untransformed stringlist
set str [overtype::left $leftcol [lindex $stringlist $i]]
} else {
set str [overtype::left $leftcol ...]]
}
puts stdout " $str $idxpart"
incr r
}
#puts stdout "|> '[lindex $stringlist $i]'"
#puts stdout "|> [lindex $keylist $i]"
}
puts stdout "|debug> topdict: $topdict"
puts stdout "|debug> splitchars: $splitchars"
}
return $result
}
#Note that although leading whitespace isn't a commonly used feature of filesystem names - it's possible at least on FreeBSD,windows and linux so we should try to handle it sensibly.
proc sort_experiment {stringlist args} {
package require sqlite3
variable debug
set args [check_flags -caller natsort::sort \
-defaults [dict create -db :memory: -collate nocase -nullvalue "->NULL<" -winlike 0 -topchars [list] -debug 0] \
-extras {all} \
-values $args]
set db [string trim [dict get $args -db]]
set collate [string trim [dict get $args -collate]]
set winlike [string trim [dict get $args -winlike]]
set debug [string trim [dict get $args -debug]]
set nullvalue [string trim [dict get $args -nullvalue]]
set topchars [string trim [dict get $args -topchars]]
set topdot [expr {"." in $topchars}]
set topunderscore [expr {"_" in $topchars}]
sqlite3 db_natsort2 $db
#--
#our table must handle the name with the greatest number of numeric/non-numeric splits.
#This means a single list member with pathological naming e.g a1a1a1a1a1a1a1a1a1a1a1.txt could greatly extend the number of columns and indices and affect performance.
#review: could be optimised to aggregate the tail into a single index, as the the extra columns won't assist in ordering, but make the table and query bigger.
# we should probably determine the longest common sequence of splits in the input list and add only one more index for the segment after that.
set maxsegments 0
#--
set prefix "idx"
#note - there will be more columns in the sorting table than segments.
# (a segment equals one of the numeric & non-numeric string portions returned from 'split_numeric_sgements')
#---------------------------
# consider
# a123b.v1.2.txt
# a123b.v1.3beta1.txt
# these have the following segments:
# a 123 b.v 1 . 2 .txt
# a 123 b.v 1 . 3 beta 1 .txt
#---------------------------
# The first string has 7 segments (numbered 0 to 6)
# the second string has 9 segments
#
# for example when the data has any elements in a segment position that are numeric (e.g 0001 123)
# - then an index column with numeric equivalents will be created (e.g 0001 becomes 1), and any non-numeric values in that column will get mapped to a negative value (for special cases) or a high value such as NULL (with NULLS LAST sql support)
#
# when a segment
#cycle through all strings - we cannot build tabledef as we go because the column type depends on whether all segments for a particular column are text vs int-equivalent.
array set segmentinfo {}
foreach nm $stringlist {
set segments [split_numeric_segments $nm]
if {![string length [string trim [lindex $segments 0]]]} {
if {[string is digit [string trim [lindex $segments 1]]]} {
#name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though)
set segments [lrange $segments 1 end]
}
}
set c 0 ;#start of index columns
if {[llength $segments] > $maxsegments} {
set maxsegments [llength $segments]
}
foreach seg $segments {
set seg [string trim $seg]
set column_exists [info exists segmentinfo($c,type)]
if {[string is digit $seg]} {
if {$column_exists} {
#override it (may currently be text or int)
set segmentinfo($c,type) "int"
} else {
#new column
set segmentinfo($c,name) ${prefix}$c
set segmentinfo($c,type) "int"
}
} else {
#text never overrides int
if {!$column_exists} {
set segmentinfo($c,name) ${prefix}$c
set segmentinfo($c,type) "text"
}
}
incr c
}
}
if {$debug} {
puts stdout "Largest number of num/non-num segments in data: $maxsegments"
#parray segmentinfo
}
#
set tabledef ""
set ordered_column_names [list]
set ordered_segmentinfo_tags [lsort -dictionary [array names segmentinfo *]]
foreach k $ordered_segmentinfo_tags {
lassign [split $k ,] c tag
if {$tag eq "type"} {
set type [set segmentinfo($k)]
if {$type eq "int"} {
append tabledef "$segmentinfo($c,name) int,"
} else {
append tabledef "$segmentinfo($c,name) text COLLATE $collate,"
}
append tabledef "raw$c text COLLATE $collate,"
lappend ordered_column_names $segmentinfo($c,name)
lappend ordered_column_names raw$c ;#additional index column not in segmentinfo
}
if {$tag eq "name"} {
#lappend ordered_column_names $segmentinfo($k)
}
}
append tabledef "name text"
#puts stdout "tabledef:$tabledef"
db_natsort2 eval [string map [list %tabledef% $tabledef] {create table natsort(%tabledef%)}]
foreach nm $stringlist {
array unset intdata
array set intdata {}
array set rawdata {}
#init array and build sql values string
set sql_insert "insert into natsort values("
for {set i 0} {$i < $maxsegments} {incr i} {
set intdata($i) ""
set rawdata($i) ""
append sql_insert "\$intdata($i),\$rawdata($i),"
}
append sql_insert "\$nm" ;#don't manipulate name value in any way - e.g must leave all whitespace as the sort must return exactly the same elements as in the original list.
append sql_insert ")"
set segments [split_numeric_segments $nm]
if {![string length [string trim [lindex $segments 0]]]} {
if {[string is digit [string trim [lindex $segments 1]]]} {
#name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though)
set segments [lrange $segments 1 end]
}
}
set values ""
set c 0
foreach seg $segments {
if {[set segmentinfo($c,type)] eq "int"} {
if {[string is digit [string trim $seg]]} {
set intdata($c) [trimzero [string trim $seg]]
} else {
catch {unset intdata($c)} ;#set NULL - sorts last
if {($c == 0) && ($topunderscore) && [string match _* [string trim $seg]]} {
set intdata($c) -100
}
if {($c == 0) && ($topdot) && [string match .* [string trim $seg]]} {
set intdata($c) -50
}
}
set rawdata($c) [string trim $seg]
} else {
#pure text column
#set intdata($c) [string trim $seg] ;#ignore leading/trailing whitespace - we sort first on trimmed version, then refine with the sort on rawdata index
#catch {unset indata($c)}
set indata($c) [string trim $seg]
set rawdata($c) $seg
}
#set rawdata($c) [string trim $seg]#
#set rawdata($c) $seg
incr c
}
db_natsort2 eval $sql_insert
}
set orderedlist [list]
if {$debug} {
db_natsort2 eval {select * from pragma_table_info('natsort')} rowdata {
parray rowdata
}
}
set orderby "order by "
foreach cname $ordered_column_names {
if {[string match "idx*" $cname]} {
append orderby "$cname ASC NULLS LAST,"
} else {
append orderby "$cname ASC,"
}
}
append orderby " name ASC"
#append orderby " NULLS LAST" ;#??
#e.g "order by idx0 ASC, raw0 ASC, idx1 ASC .... name ASC"
if {$debug} {
puts stdout "orderby clause: $orderby"
}
db_natsort2 eval [string map [list %orderby% $orderby] {select * from natsort %orderby%}] rowdata {
set line "- "
#parray rowdata
set columnnames $rowdata(*)
#puts stdout "columnnames: $columnnames"
#[lsort -dictionary [array names rowdata]
append line "$rowdata(name) \n"
foreach nm $columnnames {
if {$nm ne "name"} {
append line "$nm: $rowdata($nm) "
}
}
#puts stdout $line
#puts stdout "$rowdata(name)"
lappend orderedlist $rowdata(name)
}
db_natsort2 close
return $orderedlist
}
}
#application section e.g this file might be linked from /usr/local/bin/natsort
namespace eval natsort {
namespace import ::flagfilter::check_flags
proc called_directly_namematch {} {
global argv0
if {[info script] eq ""} {
return 0
}
#see https://wiki.tcl-lang.org/page/main+script
#trailing ... let's us resolve symlinks in last component of the path (could be something else like ___ but ... seems unlikely to collide with anything in the filesystem)
if {[info exists argv0]
&&
[file dirname [file normalize [file join [info script] ...]]]
eq
[file dirname [file normalize [file join $argv0 ...]]]
} {
return 1
} else {
#puts stdout "norm info script: [file dirname [file normalize [file join [info script] ...]]]"
#puts stdout "norm argv0 : [file dirname [file normalize [file join $argv0 ...]]]"
return 0
}
}
#Review issues around comparing names vs using inodes (esp with respect to samba shares)
proc called_directly_inodematch {} {
global argv0
if {[info exists argv0]
&& [file exists [info script]] && [file exists $argv0]} {
file stat $argv0 argv0Info
file stat [info script] scriptInfo
if {$argv0Info(ino) == 0 || $scriptInfo(ino) == 0 || $argv0Info(dev) == 0 || $scriptInfo(dev) == 0} {
#vfs?
#e.g //zipfs:/
return 0
}
return [expr {$argv0Info(dev) == $scriptInfo(dev)
&& $argv0Info(ino) == $scriptInfo(ino)}]
} else {
return 0
}
}
if {![interp issafe]} {
set is_namematch [called_directly_namematch]
set is_inodematch [called_directly_inodematch]
####
#review - reliability of mechanisms to determine direct calls
# we don't want application being called when being used as a library, but we need it to run if called directly or from symlinks etc
#-- choose a policy and leave the others commented.
#set is_called_directly $is_namematch
#set is_called_directly $is_inodematch
#puts "NATSORT: called_directly_namematch - $is_namematch"
#puts "NATSORT: called_directly_inodematch - $is_inodematch"
#flush stdout
set is_called_directly [expr {$is_namematch || $is_inodematch}]
#set is_called_directly [expr {$is_namematch && $is_inodematch}]
###
#puts stdout "called_directly_name: [called_directly_namematch] called_directly_inode: [called_directly_inodematch]"
} else {
#safe interp
set is_called_directly 0
}
proc test_pass_fail_message {pass {additional ""}} {
variable test_fail_msg
variable test_pass_msg
if {$pass} {
puts stderr $test_pass_msg
} else {
puts stderr $test_fail_msg
}
puts stderr $additional
}
variable test_fail_msg "XXXXXXXXXXXX FAIL XXXXXXXXXXXXX"
variable test_pass_msg "------------ PASS -------------"
proc test_sort_1 {args} {
package require struct::list
puts stderr "---$args"
set args [check_flags -caller natsort:test_sort_1 -defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] -values $args]
puts stderr "test_sort_1 got args: $args"
set unsorted_input {
2.2.2
2.2.2.2
1a.1.1
1a.2.1.1
1.12.1
1.2.1.1
1.02.1.1
1.002b.1.1
1.1.1.2
1.1.1.1
}
set input {
1.1.1
1.1.1.2
1.002b.1.1
1.02.1.1
1.2.1.1
1.12.1
1a.1.1
1a.2.1.1
2.2.2
2.2.2.2
}
set sorted [natsort::sort $input {*}$args]
set is_match [struct::list equal $input $sorted]
set msg "windows-explorer order"
test_pass_fail_message $is_match $msg
puts stdout [string repeat - 40]
puts stdout INPUT
puts stdout [string repeat - 40]
foreach item $input {
puts stdout $item
}
puts stdout [string repeat - 40]
puts stdout OUTPUT
puts stdout [string repeat - 40]
foreach item $sorted {
puts stdout $item
}
test_pass_fail_message $is_match $msg
return [expr {!$is_match}]
}
proc test_sort_showsplits {args} {
package require struct::list
set args [check_flags -caller natsort:test_sort_1 \
-defaults [list -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 1 ] \
-extras {all} \
-values $args]
set input1 {
a-b.txt
a.b.c.txt
b.c-txt
}
set input2 {
a.b.c.txt
a-b.txt
b.c-text
}
foreach {msg testlist } [list "custom-order" $input1 "windows-explorer (should work with -winlike 1)" $input2] {
set sorted [natsort::sort $testlist {*}$args]
set is_match [struct::list equal $testlist $sorted]
test_pass_fail_message $is_match $msg
puts stderr "INPUT"
puts stderr "[string repeat - 40]"
foreach item $testlist {
puts stdout $item
}
puts stderr "[string repeat - 40]"
puts stderr "OUTPUT"
puts stderr "[string repeat - 40]"
foreach item $sorted {
puts stdout $item
}
test_pass_fail_message $is_match $msg
}
#return [expr {!$is_match}]
}
#tcl proc dispatch order - non flag items up front
#trailing flags are paired even if supplied as solo flags e.g -l becomes -l 1
proc commandline_ls {args} {
set operands [list]
set posn 0
foreach a $args {
if {![string match -* $a]} {
lappend operands $a
} else {
set flag1_posn $posn
break
}
incr posn
}
set args [lrange $args $flag1_posn end]
set debug 0
set posn [lsearch $args -debug]
if {$posn > 0} {
if {[lindex $args $posn+1]} {
set debug [lindex $args $posn+1]
}
}
if {$debug} {
puts stderr "|debug>commandline_ls got $args"
}
#if first operand not supplied - replace it with current working dir
if {[lindex $operands 0] eq "\uFFFF"} {
lset operands 0 [pwd]
}
set targets [list]
foreach op $operands {
if {$op ne "\uFFFF"} {
set opchars [split [file tail $op] ""]
if {"?" in $opchars || "*" in $opchars} {
lappend targets $op
} else {
#actual file or dir
set targetitem $op
set targetitem [file normalize $op]
if {![file exists $targetitem]} {
if {$debug} {
puts stderr "|debug>commandline_ls Unable to access path '$targetitem'"
}
}
lappend targets $targetitem
if {$debug} {
puts stderr "|debug>commandline_ls listing for $targetitem"
}
}
}
}
set args [check_flags -caller commandline_ls \
-return flagged|defaults \
-debugargs 0 \
-defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 -algorithm sort] \
-required {all} \
-extras {all} \
-soloflags {-v -l} \
-commandprocessors {} \
-values $args ]
if {$debug} {
puts stderr "|debug>args: $args"
}
set algorithm [dict get $args -algorithm]
dict unset args -algorithm
set allfolders [list]
set allfiles [list]
foreach item $targets {
if {[file exists $item]} {
if {[file type $item] eq "directory"} {
set dotfolders [glob -nocomplain -directory $item -type {d} -tail .*]
set folders [glob -nocomplain -directory $item -type {d} -tail *]
set allfolders [concat $allfolders $dotfolders $folders]
set dotfiles [glob -nocomplain -directory $item -type {f} -tail .*]
set files [glob -nocomplain -directory $item -type {f} -tail *]
set allfiles [concat $allfiles $dotfiles $files]
} else {
#file (or link?)
set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]]
set allfiles [concat $allfiles $files]
}
} else {
set folders [glob -nocomplain -directory $item -type {d} -tail [file tail $item]]
set allfolders [concat $allfolders $folders]
set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]]
set allfiles [concat $allfiles $files]
}
}
set sorted_folders [natsort::sort $allfolders {*}$args]
set sorted_files [natsort::sort $allfiles {*}$args]
foreach fold $sorted_folders {
puts stdout $fold
}
foreach file $sorted_files {
puts stdout $file
}
return "-- ok printed to stdout [llength $sorted_folders] folders and [llength $sorted_files] files --"
}
package require argp
argp::registerArgs commandline_test {
{ -showsplits boolean 0}
{ -stacktrace boolean 0}
{ -debug boolean 0}
{ -winlike boolean 0}
{ -db string ":memory:"}
{ -collate string "nocase"}
{ -algorithm string "sort"}
{ -topchars string "\uFFFF"}
{ -testlist string {10 1 30 3}}
}
argp::setArgsNeeded commandline_test {-stacktrace}
proc commandline_test {test args} {
variable testlist
puts stdout "commandline_test got $args"
argp::parseArgs opts
puts stdout "commandline_test got [array get opts]"
set args [check_flags -caller natsort_commandline \
-return flagged|defaults \
-defaults [list -db :memory: -collate nocase -testlist $testlist -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
-values $args]
if {[string tolower $test] in [list "1" "true"]} {
set test "sort"
} else {
if {![llength [info commands $test]]} {
error "test $test not found"
}
}
dict unset args -test
set stacktrace [dict get $args -stacktrace]
# dict unset args -stacktrace
set argtestlist [dict get $args -testlist]
dict unset args -testlist
set debug [dict get $args -debug]
set collate [dict get $args -collate]
set db [dict get $args -db]
set winlike [dict get $args -winlike]
set topchars [dict get $args -topchars]
puts stderr "|test>-----start natsort::$test--- input list size : [llength $argtestlist]"
#set resultlist [$test $argtestlist -db $db -collate $collate -topchars $topchars -winlike $winlike]
set resultlist [$test $argtestlist {*}$args]
foreach nm $resultlist {
puts stdout $nm
}
puts stdout "|test>-----end natsort::$test--- sorted list size: [llength $resultlist]"
return "test end"
}
proc commandline_runtests {runtests args} {
set argvals [check_flags -caller commandline_runtests \
-defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits "\uFFFF" -runtests 1] \
-values $args]
puts stderr "runtests args: $argvals"
#set runtests [dict get $argvals -runtests]
dict unset argvals -runtests
dict unset argvals -algorithm
puts stderr "runtests args: $argvals"
#exit 0
set test_prefix "::natsort::test_sort_"
if {$runtests eq "1"} {
set runtests "*"
}
set testcommands [info commands ${test_prefix}${runtests}]
if {![llength $testcommands]} {
puts stderr "No test commands matched -runtests argument '$runtests'"
puts stderr "Use 1 to run all tests"
set alltests [info commands ${test_prefix}*]
puts stderr "Valid tests are:"
set prefixlen [string length $test_prefix]
foreach t $alltests {
set shortname [string range $t $prefixlen end]
puts stderr "$t = -runtests $shortname"
}
} else {
foreach cmd $testcommands {
puts stderr [string repeat - 40]
puts stderr "calling $cmd with args: '$argvals'"
puts stderr [string repeat - 40]
$cmd {*}$argvals
}
}
exit 0
}
proc help {args} {
puts stdout "natsort::help got '$args'"
return "Help not implemented"
}
proc natsort_pipe {args} {
#PIPELINE to take input list on stdin and write sorted list to stdout
#strip - from arglist
#set args [check_flags -caller natsort_pipeline \
# -return all \
# -defaults [list -db :memory: -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
# -values $args]
set debug [dict get $args -debug]
if {$debug} {
puts stderr "|debug> natsort_pipe got args:'$args'"
}
set algorithm [dict get $args -algorithm]
dict unset args -algorithm
set proclist [info commands ::natsort::sort*]
set algos [list]
foreach p $proclist {
lappend algos [namespace tail $p]
}
if {$algorithm ni [list {*}$proclist {*}$algos]} {
do_error "valid sort mechanisms: $algos" 2
}
set input_list [list]
while {![eof stdin]} {
if {[gets stdin line] > 0} {
lappend input_list $line
} else {
if {[eof stdin]} {
} else {
after 10
}
}
}
if {$debug} {
puts stderr "|debug> received [llength $input_list] list elements"
}
set resultlist [$algorithm $input_list {*}$args]
if {$debug} {
puts stderr "|debug> returning [llength $resultlist] list elements"
}
foreach r $resultlist {
puts stdout $r
}
#exit 0
}
if {($is_called_directly)} {
set cmdprocessors {
{helpfinal {match "^help$" dispatch natsort::help}}
{helpfinal {sub -topic default "NONE"}}
}
#set args [check_flags \
# -caller test1 \
# -debugargs 2 \
# -return arglist \
# -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
# -required {none} \
# -extras {all} \
# -commandprocessors $cmdprocessors \
# -values $::argv ]
interp alias {} do_filter {} ::flagfilter::check_flags
#mashopts are generally single-letter opts that can be run together e.g -l -d as -ld
set cmdprocessors {
{helpcmd {match "^help$" dispatch natsort::help singleopts {-v}}}
{helpcmd {sub -operand default \uFFFF singleopts {-l}}}
{lscmd {match "^ls$" dispatch natsort::commandline_ls dispatchtype tcl dispatchglobal 1 mashopts {-l -a} singleopts {-l -a} pairopts {} longopts {--color=always}}}
{lscmd {sub dir default "\uFFFF"}}
{lscmd {sub dir2 default "\uFFFF"}}
{lscmd {sub dir3 default "\uFFFF"}}
{lscmd {sub dir4 default "\uFFFF"}}
{lscmd {sub dir5 default "\uFFFF"}}
{lscmd {sub dir6 default "\uFFFF"}}
{runtests {match "^-tests$" dispatch natsort::commandline_runtests singleopts {-l}}}
{runtests {sub testname default "1" singleopts {-l}}}
{pipecmd {match "^-$" dispatch natsort::natsort_pipe dispatchtype tcl}}
}
set arglist [do_filter \
-debugargs 0 \
-debugargsonerror 2 \
-caller cline_dispatch1 \
-return all \
-soloflags {-v -x} \
-defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \
-required {all} \
-extras {all} \
-commandprocessors $cmdprocessors \
-values $::argv ]
#mashopts are generally single-letter opts that can be run together e.g -l -d as -ld
set cmdprocessors {
{testcmd {match "^test$" dispatch natsort::commandline_test singleopts {-l}}}
{testcmd {sub testname default "1" singleopts {-l}}}
}
set arglist [check_flags \
-debugargs 0 \
-caller cline_dispatch2 \
-return all \
-soloflags {-v -l} \
-defaults [list -collate nocase -algorithm sort -testlist "1 2 3 10" -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \
-required {all} \
-extras {all} \
-commandprocessors $cmdprocessors \
-values $::argv ]
#set cmdprocessors [list]
#set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors $cmdprocessors -values $::argv ]
#set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {-cmd {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ]
#set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {{-cmd -default help} {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ]
#set args [check_flags -caller test1 -defaults [list -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {ls {ls lsdir -default "\uFFFF"}} -values $::argv ]
puts stderr "natsort directcall exit"
flush stderr
exit 0
if {$::argc} {
}
}
}
package provide natsort [namespace eval natsort {
variable version
set version 0.1.1.6
}]