punkshell/src/bootsupport/modules/natsort-0.1.1.6.tm

#! /usr/bin/env tclsh


package require flagfilter
namespace import ::flagfilter::check_flags

namespace eval natsort {
    #REVIEW - determine and document the purpose of scriptdir being added to tm path
     proc scriptdir {} {
        set possibly_linked_script [file dirname [file normalize [file join [info script] __dummy__]]]
        if {[file isdirectory $possibly_linked_script]} {
            return $possibly_linked_script
        } else {
            return [file dirname $possibly_linked_script]
        }
    }
    if {![interp issafe]} {
        set sdir [scriptdir]
        #puts stderr "natsort tcl::tm::add $sdir"
        if {$sdir ni [tcl::tm::list]} {
            catch {tcl::tm::add $sdir}
        }
    }
}


namespace eval natsort {
    variable stacktrace_on 0

    proc do_error {msg {then error}} {
        #note we exit or error out even if debug selected - as every do_error call is meant to interrupt code processing at the site of call
        #this is not just a 'logging' call even though it has log-like descriptors
        lassign $then type code
        if {$code eq ""} {
          set code 1
        }
        set type [string tolower $type]
        set levels [list debug info notice warn error critical]
        if {$type in [concat $levels exit]} {
            puts stderr "|$type> $msg"
        } else {
            puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be one of '$levels'  or 'exit <numericcode>'"
        }
        flush stderr
        if {$::tcl_interactive} {
            #may not always be desirable - but assumed to be more useful not to exit despite request, to aid in debugging
            if {[string tolower $type] eq "exit"} {
              puts stderr "    (exit suppressed due to tcl_interactive - raising error instead)"
              if {![string is digit -strict $code]} {
                puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be: 'exit <numericcode>'"
              }
              flush stderr
            }
            return -code error $msg
        } else {
            if {$type ne "exit"} {
              return -code error $msg
            } else {
              if {[string is digit -strict $code]} {
                exit $code
              } else {
                puts stderr "|>natsort_call_err> unable to interpret 2nd argument to do_error: '$then' should be 'error' or 'exit <numericcode>'"
                return -code error $msg
              }
            }
        }
    }


    variable debug 0
    variable testlist
    set testlist {
        00.test-firstposition.txt
        0001.blah.txt
        1.test-sorts-after-all-leadingzero-number-one-equivs.txt
        1010.thousand-and-ten.second.txt
        01010.thousand-and-ten.first.txt
        0001.aaa.txt
        001.zzz.txt
        08.octal.txt-last-octal
        008.another-octal-first-octal.txt
        08.again-second-octal.txt
        001.a.txt
        0010.reconfig.txt
        010.etc.txt
        005.etc.01.txt
        005.Etc.02.txt
        005.123.abc.txt
        200.somewhere.txt
        2zzzz.before-somewhere.txt
        00222-after-somewhere.txt
        005.00010.abc.txt
        005.a3423bc.00010.abc.txt
        005.001.abc.txt
        005.etc.1010.txt
        005.etc.010.txt
        005.etc.10.txt
        " 005.etc.10.txt"
        005.etc.001.txt
        20.somewhere.txt
        4611686018427387904999999999-bignum.txt
        4611686018427387903-bigishnum.txt
        9223372036854775807-bigint.txt
        etca-a
        etc-a
        etc2-a
        a0001blah.txt
        a010.txt
        winlike-sort-difference-0.1.txt
        winlike-sort-difference-0.1.1.txt
        a1.txt
        b1-a0001blah.txt
        b1-a010.txt
        b1-a1.txt
        -a1.txt
        --a1.txt
        --a10.txt
        2.high-two.yml
        02.higher-two.yml
        reconfig.txt
        _common.stuff.txt
        CASETEST.txt
        casetest.txt
        something.txt
        some~thing.txt
        someathing.txt
        someThing.txt
        thing.txt
        thing_revised.txt
        thing-revised.txt
        "thing revised.txt"
        "spacetest.txt"
        " spacetest.txt"
        "  spacetest.txt"
        "spacetest2.txt"
        "spacetest 2.txt"
        "spacetest02.txt"
        name.txt
        name2.txt
        "name .txt"
        "name2 .txt"
        blah.txt
        combined.txt
        a001.txt
        .test
        .ssh
        "Feb 10.txt"
        "Feb  8.txt"
        1ab23v23v3r89ad8a8a8a9d.txt
        "Folder (10)/file.tar.gz"
        "Folder/file.tar.gz"
        "Folder (1)/file (1).tar.gz"
        "Folder (1)/file.tar.gz"
        "Folder (01)/file.tar.gz"
        "Folder1/file.tar.gz"
        "Folder(1)/file.tar.gz"

    }
    lappend testlist "Some file.txt"
    lappend testlist "  Some extra file1.txt"
    lappend testlist "  Some extra file01.txt"
    lappend testlist "  some extra file1.txt"
    lappend testlist "  Some extra file003.txt"
    lappend testlist " Some file.txt"
    lappend testlist "Some extra file02.txt"
    lappend testlist "Program Files (x86)"
    lappend testlist "01999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999991-bigger-pathologically-bignum.txt"
    lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt"
    lappend testlist "29999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-smaller-pathologically-bignum.txt"
    lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 1.txt"
    lappend testlist "199999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999-pathologically-bignum.txt with (more 01.txt"
    lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt"
    lappend testlist "a1a1a1a1a1a1a1a1a1a1a1a01a1a1a1a1a1a1a1a1a1a1a1a1.pathological-num-nonnum-swapping-leadzero-should-be-first.txt"
    lappend testlist "b1b1b1b1.txt"
    lappend testlist "b1b01z1z1.txt"
    lappend testlist "c1c111c1.txt"
    lappend testlist "c1c1c1c1.txt"

    namespace eval overtype {
        proc right {args} {
            # @d !todo - implement overflow, length checks etc

            if {[llength $args] < 2} {
                error {usage: ?-overflow [1|0]? undertext overtext}
            }
            foreach {undertext overtext} [lrange $args end-1 end] break

            set opt(-overflow)	0
            array set opt [lrange $args 0 end-2]


            set olen [string length $overtext]
            set ulen [string length $undertext]

            if {$opt(-overflow)} {
                return [string range $undertext 0 end-$olen]$overtext
            } else {
                if {$olen > $ulen} {
                    set diff [expr {$olen - $ulen}]
                    return [string range $undertext 0 end-$olen][string range $overtext 0 end-$diff]
                } else {
                    return [string range $undertext 0 end-$olen]$overtext
                }
            }
        }
        proc left {args} {
            # @c overtype starting at left (overstrike)
            # @c can/should we use something like this?: 'format "%-*s" $len $overtext

            if {[llength $args] < 2} {
                error {usage: ?-overflow [1|0]? ?-ellipsis [1|0]? ?-ellipsistext ...? undertext overtext}
            }
            foreach {undertext overtext} [lrange $args end-1 end] break

            set opt(-ellipsis)		0
            set opt(-ellipsistext)	{...}
            set opt(-overflow)	0
            array set opt [lrange $args 0 end-2]


            set len [string length $undertext]
            set overlen [string length $overtext]
            set diff [expr {$overlen - $len}]

            #puts stdout "====================>overtype: datalen:$len  overlen:$overlen diff:$diff"
            #puts stdout "====================>overtype: data: $overtext"
            if {$diff > 0} {
                if {$opt(-overflow)} {
                    return $overtext
                } else {
                    if {$opt(-ellipsis)} {
                        return [overtype::left [string range $overtext 0 [expr {$len -1}]] $opt(-ellipsistext)]
                    } else {
                        return [string range $overtext 0 [expr {$len -1}]]
                    }
                }
            } else {
                return "$overtext[string range $undertext $overlen end]"
            }
        }

    }

    #considered using hex to make large numbers more compact for viewing in debug output - but it's not that much shorter and probably obscures more than it helps.
    proc hex2dec {largeHex} {
        #todo - use punk::lib::hex2dec - (scan supports ll so can do larger hex values directly)
        set res 0
        set largeHex [string map {_ {}} $largeHex]
        if {[string length $largeHex] <=7} {
            #scan can process up to FFFFFFF and does so quickly
            return [scan $largeHex %x]
        }
        foreach hexDigit [split $largeHex {}] {
            set new 0x$hexDigit
            set res [expr {16*$res + $new}]
        }
        return $res
    }
    proc dec2hex {decimalNumber} {
        format %4.4llX $decimalNumber
    }

    #punk::lib::trimzero
    proc trimzero {number} {
        set trimmed [string trimleft $number 0]
        if {[string length $trimmed] == 0} {
            set trimmed 0
        }
        return $trimmed
    }
    #todo - consider human numeric split
    #e.g consider SI suffixes k|KMGTPEZY in that order

    #in this context, for natural sorting - numeric segments don't contain underscores or other punctuation such as . - + etc.
    #review - what about unicode equivalents such as wide numerals \UFF10 to \UFF19?  unicode normalization?
    proc split_numeric_segments {name} {
        set segments [list]
        while {[string length $name]} {
            if {[scan $name {%[0-9]%n} chunk len] == 2} {
                lappend segments $chunk
                set name [string range $name $len end]
            }
            if {[scan $name {%[^0-9]%n} chunk len] == 2} {
                lappend segments $chunk
                set name [string range $name $len end]
            }
        }
        return $segments
    }

    proc padleft {str count {ch " "}} {
        set val [string repeat $ch $count]
        append val $str
        set diff [expr {max(0,$count - [string length $str])}]
        set offset [expr {max(0,$count - $diff)}]
        set val [string range $val $offset end]
    }


    # Sqlite may have limited collation sequences available in default builds.
    # with custom builds - there may be others such as 'natsort' - see https://sqlite.org/forum/forumpost/e4dc6f3331
    # This is of limited use with the few builtin collations available in 2023 ie binary,nocase & rtrim
    #  but may provide a quicker,flexible sort option, especially if/when more collation sequences are added to sqlite
    # There are also prebuilt packages such as sqlite3-icu  which allows things like "SELECT icu_load_collation('en_AU', 'australian');"
    proc sort_sqlite {stringlist args} {
        package require sqlite3


        set args [check_flags -caller natsort_sqlite -defaults [list -db :memory: -collate nocase  -winlike 0 -topchars "\uFFFF" -debug 0 -splitchars [list / . - _] -extras {all}] -values $args]
        set db          [string trim [dict get $args -db]]
        set collate     [string trim [dict get $args -collate]]
        set debug       [string trim [dict get $args -debug]]
        set topchars    [string trim [dict get $args -topchars]]

        set topdot [expr {"." in $topchars}]
        set topunderscore [expr {"_" in $topchars}]


        sqlite3 db_sort_basic $db
        set orderedlist [list]
        db_sort_basic eval [string map [list %collate% $collate] {create table sqlitesort(index0 text COLLATE %collate%, name text COLLATE %collate%)}]
        foreach nm $stringlist {
            set segments [split_numeric_segments $nm]
            set index ""
            set s 0
            foreach seg $segments {
                if {($s == 0) && ![string length [string trim $seg]]} {
                    #don't index leading space
                } elseif {($s == 0) && ($topunderscore) && [string match _* [string trim $seg]]} {
                    append index "[padleft "0" 5]-d -100 topunderscore "
                    append index [string trim $seg]
                } elseif {($s == 0) && ($topdot) && [string match .* [string trim $seg]]} {
                    append index "[padleft "0" 5]-d -50 topdot "
                    append index [string trim $seg]
                } else {
                    if {[string is digit [string trim $seg]]} {
                        set basenum [trimzero [string trim $seg]]
                        set lengthindex "[padleft [string length $basenum] 5]-d"
                        append index "$lengthindex "
                        #append index [padleft $basenum 40]
                        append index $basenum
                    } else {
                        append index [string trim $seg]
                    }
                }
                incr s
            }
            puts stdout ">>$index"
            db_sort_basic eval {insert into sqlitesort values($index,$nm)}
        }
        db_sort_basic eval [string map [list %collate% $collate] {select name from sqlitesort order by index0 COLLATE %collate% ASC, name COLLATE %collate% ASC }] {
            lappend orderedlist $name
        }
        db_sort_basic close
        return $orderedlist
    }

    proc get_leading_char_count {str char} {
        #todo - something more elegant? regex?
        set count 0
        foreach c [split $str "" ] {
            if {$c eq $char} {
                incr count
            } else {
                break
            }
        }
        return $count
    }
    proc stacktrace {} {
        set stack "Stack trace:\n"
        for {set i 1} {$i < [info level]} {incr i} {
            set lvl [info level -$i]
            set pname [lindex $lvl 0]
            append stack [string repeat " " $i]$pname

            if {![catch {info args $pname} pargs]} {
                foreach value [lrange $lvl 1 end] arg $pargs {

                    if {$value eq ""} {
                        if {$arg != 0} {
                            info default $pname $arg value
                        }
                    }
                    append stack " $arg='$value'"
                }
            } else {
                append stack " !unknown vars for $pname"
            }

            append stack \n
        }
        return $stack
    }

    proc get_char_count {str char} {
        #faster than lsearch on split for str of a few K
        expr {[tcl::string::length $str]-[tcl::string::length [tcl::string::map "$char {}" $str]]}
    }

    proc build_key {chunk  splitchars topdict tagconfig debug} {
        variable stacktrace_on
        if {$stacktrace_on} {
            puts stderr "+++>[stacktrace]"
        }

        set index_map [list - "" _ ""]
        #e.g - need to maintain the order
        #a b.txt
        #a book.txt
        #ab.txt
        #abacus.txt


        set original_splitchars [dict get $tagconfig original_splitchars]

        # tag_dashes test moved from loop - review
        set tag_dashes 0
        if {![string length [dict get $tagconfig last_part_text_tag]]} {
            #winlike
            set tag_dashes 1
        }
        if {("-" ni $original_splitchars)} {
            set tag_dashes 1
        }
        if {$debug >= 3} {
            puts stdout "START build_key chunk        : $chunk"
            puts stdout "START build_key splitchars   : $splitchars $topdict $tagconfig  NO tag dashes"
        }


        ## index_map will have no effect if we've already split on the char anyway(?)
        #foreach m [dict keys $index_map] {
        #    if {$m in $original_splitchars} {
        #        dict unset index_map $m
        #    }
        #}

        #if {![string length $chunk]} return

        set result ""
        if {![llength $splitchars]} {
            #no more structural splits - but we need to examine numeric/non-numeric segments at the lowest level.
            # we are at a leaf in the recursive split hierarchy

            set s "" ;#we never actually split on "" (unless that was put in splitchars.. but it probably shouldn't be)
            set parts [list $chunk] ;#important to treat as list or leading/trailing whitespace lost


        } else {
            set s [lindex $splitchars 0]
            if {"spudbucket$s" in "[split $chunk {}]"} {
                error "dead-branch spudbucket"
                set partindex [build_key $chunk  [lrange $splitchars 1 end] $topdict $tagconfig $debug]
                if {[dict get $tagconfig showsplits]} {
                    set pfx "(1${s}=)"  ;# = sorts before _
                    set partindex ${pfx}$partindex
                }

                return $partindex
            } else {
                set parts_below_index ""

                if {$s ni [split $chunk ""]} {
                    #$s can be an empty string
                    set parts [list $chunk]
                } else {
                    set parts [split $chunk $s] ;#whitespace preserved - even if splitting on s that is not in string.
                }
                #assert - we have a splitchar $s that is in the chunk - so at least one part
                if {(![string length $s] || [llength $parts] == 0)} {
                    error "buld_key assertion false empty split char and/or no parts"
                }

                set pnum 1 ;# 1 based for clarity of reading index in debug output
                set subpart_count [llength $parts]

                set sub_splits [lrange $splitchars 1 end] ;#pass same splitchars to each subpart
                foreach p $parts {
                    set partindex [build_key $p  $sub_splits $topdict $tagconfig $debug]
                    set lastpart [expr {$pnum == $subpart_count}]


                    #######################
                    set showsplits [dict get $tagconfig showsplits]
                    #split prefixing experiment - maybe not suitable for general use - as it affects sort order
                    #note that pfx must be consistent until last one, no matter how many partnumbers there are in total.
                    # we don't want to influence sort order before reaching end.
                    #e.g for:
                    #(1.=)...
                    #(1._)...(2._)...(3.=)
                    #(1._)...(2.=)
                    #Note that this is probably more suitable for highly structure dependant sorts where the results are maybe less.. natural.
                    if {$showsplits} {
                        if {$lastpart} {
                            set pfx "(${pnum}${s}_"
                            #set pfx "(${pnum}${s}=)"  ;# = sorts before _
                        } else {
                            set pfx "(${pnum}${s}_"
                        }
                        append parts_below_index $pfx
                    }
                    #######################

                    if {$lastpart} {
                        if {[string length $p] && [string is digit $p]} {
                            set last_part_tag "<22${s}>"
                        } else {
                            set last_part_tag "<33${s}>"
                        }

                        set last_part_text_tag [dict get $tagconfig last_part_text_tag]
                        #for -winlike 1 there is no tag configured. Windows explorer likes to put things in the  order:
                        #  module-0.1.1.tm
                        #  module-0.1.1.2.tm
                        #  module-0.1.tm
                        # arguably -winlike 0 is more natural/human
                        #  module-0.1.tm
                        #  module-0.1.1.tm
                        #  module-0.1.1.2.tm

                        if {[string length $last_part_text_tag]} {
                            #replace only the first text-tag (<30>) from the subpart_index
                            if {[string match "<30?>*" $partindex]} {
                                #give textual string index  a specific tag   for last part in split only. e.g <130> for lower than integers
                                set partindex "<130>[string range $partindex 5 end]"
                            }
                            #append parts_below_index $last_part_tag
                        }
                        #set partindex $last_part_tag$partindex


                    }
                    append parts_below_index $partindex


                    if {$showsplits} {
                        if {$lastpart} {
                            set suffix "${pnum}${s}=)"  ;# = sorts before _
                        } else {
                            set suffix "${pnum}${s}_)"
                        }
                        append parts_below_index $suffix
                    }


                    incr pnum
                }
                append parts_below_index "" ;# don't add anything at the tail that may perturb sort order

                if {$debug >= 3} {
                    set pad [string repeat " " 20]
                    puts stdout "END build_key chunk          : $chunk "
                    puts stdout "END build_key splitchars     : $splitchars $topdict $tagconfig  NO tag dashes"
                    puts stdout "END build_key ret below_index: $parts_below_index"
                }
                return $parts_below_index


            }
        }


        #puts stdout ">>>chunk:'$chunk'<<< split-on:$s parts: '$parts'   splitchars: $splitchars -topdict:$topdict"


        #if {$chunk eq ""} {
        #   puts "___________________________________________!!!____"
        #}
        #puts stdout "-->chunk:$chunk $s parts:$parts"

        #puts stdout "---chunk:'$chunk' part:'$part' parts:'$parts' s:'$s'"


            set segments [split_numeric_segments $chunk] ;#!
            set stringindex ""
            set segnum 0
            foreach seg $segments {
                    #puts stdout "=================---->seg:$seg segments:$segments"
                #-strict ?
                if {[string length $seg] && [string is digit $seg]} {
                    set basenum [trimzero [string trim $seg]]
                    set lengthindex "[padleft [string length $basenum] 4]d"
                    #append stringindex "<20>$lengthindex $basenum $seg"
                } else {
                    set c1 [string range $seg 0 0]
                    #puts stdout "==============> c1'$c1' topdict: $topdict stringindex:$stringindex"

                    if {$c1 in [dict keys $topdict]} {
                        set tag [dict get $topdict $c1]
                        #append stringindex "${tag}$c1"
                        #set seg [string range $seg 1 end]
                    }
                    #textindex
                    set leader "<30>"
                    set idx $seg
                    set idx [string trim $idx]
                    set idx [string tolower $idx]
                    set idx [string map $index_map $idx]


                    #set the X-c count to match the length of the index - not the raw data
                    set lengthindex "[padleft [string length $idx] 4]c"

                    #append stringindex "${leader}$idx $lengthindex $texttail"
                }
            }

            if {[llength $parts] != 1} {
                error "build_key assertion fail llength parts != 1 parts:$parts"
            }

            set segtail_clearance_buffer " " ;#space to clear other split indicators if using showsplits
            set segtail $segtail_clearance_buffer
            append segtail "\["
            set grouping ""
            set pnum 0
            foreach p $parts {
                set sublen_list [list]
                set subsegments [split_numeric_segments $p]
                set i 0

                set partsorter ""
                foreach sub $subsegments {
                    ##don't trim spaces here - it would be inconsistent. Some subs are pure whitespace - others have internal whitespace. e.g "a4 400b a b2" becomes "a 4 { } 400 {b a b} 2"
                    #mapping away all whitespace would be consistent, but not necessarily desirable. If it's in the index_map it'll happen anyway - so we don't do it manually here except for evaluating conditions.
                    set test_trim [string trim $sub]
                    set str $sub
                    set str [string tolower $str]
                    set str [string map $index_map $str]
                    if {[string length $test_trim] && [string is digit $test_trim]} {
                        append partsorter [trimzero $str]
                    } else {
                        append partsorter "$str"
                    }
                    append partsorter
                }


                foreach sub $subsegments {

                    if {[string length $sub] && [string is digit $sub]} {
                        set basenum [trimzero [string trim $sub]]
                        set subequivs $basenum
                        set lengthindex "[padleft [string length $subequivs] 4]d "
                        set idx "$lengthindex [padleft $basenum 10]" ;#todo - cycle through data and determine longest
                        set tail [overtype::left [string repeat " " 10] $sub]
                        #set tail ""
                    } else {
                        set idx ""


                        set lookahead [lindex $subsegments $i+1]
                        if {![string length $lookahead]} {
                            set zeronum "[padleft 0 4]d0"
                        } else {
                            set zeronum ""
                        }
                        set subequivs $sub
                        #set subequivs [string trim $subequivs]
                        set subequivs [string tolower $subequivs]
                        set subequivs [string map $index_map $subequivs]

                        append idx $subequivs
                        append idx $zeronum

                        set idx $subequivs


                        #<removed tag_dashes test put above - review>

                        set ch "-"
                        if {$tag_dashes} {
                            #puts stdout "____TAG DASHES"
                            #winlike
                            set numleading [get_leading_char_count $seg $ch]
                            if {$numleading > 0} {
                                set texttail "<31-leading[padleft $numleading 4]$ch>"
                            } else {
                                set texttail "<30>"
                            }
                            set numothers [expr {[get_char_count $seg $ch] - $numleading}]
                            if {$debug >= 2} {
                              puts stdout "____dashcount: [get_char_count $seg $ch] numothers: $numothers"
                            }
                            if {$numothers > 0} {
                                append texttail "<31-others[padleft $numothers 4]$ch>"
                            }  else {
                                append textail "<30>"
                            }
                        } else {
                            set texttail "<30>"
                        }


                        #set idx $partsorter
                        set tail ""
                        #set tail [string tolower $sub] ;#raw
                        #set tail $partsorter
                        #append tail ":[string tolower $p]" ;#we need the whole part - even though it makes the index much larger. !todo - tagconfig switch to toggle case sensitive sorting
                    }

                    append grouping "$idx $tail<end>|$s"
                    incr i
                }


                if {$p eq ""} {
                    # no subsegments..
                    set zeronum "[padleft 0 4]d0"
                    #append grouping "\u000$zerotail"
                    append grouping ".$zeronum"
                }

                #append grouping |
                #append grouping $s
                #foreach len $sublen_list {
                #    append segtail "<[padleft $len 3]>"
                #}
                incr pnum
            }
            set grouping [string trimright $grouping $s]
            append grouping "[padleft [llength $parts] 4]"
            append segtail $grouping


            #append segtail " <[padleft [llength $parts] 4]>"

            append segtail "\]"


            #if {[string length $seg] && [string is digit $seg]} {
            #    append segtail "<20>"
            #} else {
            #    append segtail "<30>"
            #}
            append stringindex $segtail

            incr segnum


        lappend indices  $stringindex

        if {[llength $indices] > 1} {
            puts stderr "INDICES [llength $indices]: $stringindex"
            error "build_key assertion error deadconcept indices"
        }

        #topchar handling on splitter characters
        #set c1 [string range $chunk 0 0]
        if {$s  in [dict keys $topdict]} {
            set tag [dict get $topdict $s]
            set joiner [string map [list ">" "$s>"] ${tag}]
            #we have split on this character $s so if the first part is empty string then $s was a leading character
            # we need to bring a tag out front for this, or it will be dominated by the leading sections-remaing tag <S..>
            # (since the empty string produces no tag of it's own - ?)
            if {[string length [lindex $parts 0]] == 0} {
                set prefix ${joiner}
            } else {
                set prefix ""
            }
        } else {
            #use standard character-data positioning tag if no override from topdict
            set joiner "<30J>$s"
            set prefix ""
        }


        set  contentindex   $prefix[join $indices $joiner]
        if {[string length $s]} {
            set split_indicator "<split$s $chunk>"
        } else {
            set split_indicator "<split$s $chunk>"

        }
        if {![string length $s]} {
            set s ~
        }

        #return  "[overtype::left [string repeat { } 80] $contentindex][overtype::left [string repeat { } 10] [list $s $chunk]]"
        #return $contentindex$split_indicator
        #return [overtype::left [string repeat - 40] $contentindex]

        if {$debug >= 3} {
            puts stdout "END build_key chunk          : $chunk"
            puts stdout "END build_key splitchars     : $splitchars $topdict $tagconfig  NO tag dashes"
            puts stdout "END build_key ret contentidx : $contentindex"
        }
        return $contentindex
    }

    #----------------------------------------
    #line-processors - data always last argument - opts can be empty string
    #all processor should accept empty opts and ignore opts if they don't use them
    proc _lineinput_as_tcl1 {opts line} {
        set out ""
        foreach i $line {
            append out "$i "
        }
        set out [string range $out 0 end-1]
        return $out
    }
    #should be equivalent to above
    proc _lineinput_as_tcl {opts line} {
        return [concat {*}$line]
    }
    #will put extra tcl quoting if it was already tcl-shaped  e.g text  "a" -> {"a"}
    proc _lineoutput_as_tcl {opts line} {
        return [regexp -inline -all {\S+} $line]
    }

    proc _lineinput_as_raw {opts line} {
        return $line
    }
    proc _lineoutput_as_raw {opts line} {
        return $line
    }

    #words is opposite of tcl
    proc _lineinput_as_words {opts line} {
        #wordlike_parts
        return [regexp -inline -all {\S+} $line]
    }
    proc _lineoutput_as_words {opts line} {
        return [concat {*}$line]
    }

    #opts same as tcllib csv::split - except without the 'line' element
    #?-alternate? ?sepChar? ?delChar?
    proc _lineinput_as_csv {opts line} {
        package require csv
        if {[lindex $opts 0] eq "-alternate"} {
            return [csv::split -alternate $line {*}[lrange $opts 1 end]]
        } else {
            return [csv::split $line {*}$opts]
        }
    }
    #opts same as tcllib csv::join
    #?sepChar? ?delChar? ?delMode?
    proc _lineoutput_as_csv {opts line} {
        package require csv
        return [csv::join $line {*}$opts]
    }
    #----------------------------------------
    variable sort_flagspecs
    set sort_flagspecs [dict create\
        -caller natsort::sort \
        -return supplied|defaults \
        -defaults [list -collate nocase \
                -winlike 0 \
                -splits "\uFFFF" \
                -topchars {. _} \
                -showsplits 1 \
                -sortmethod ascii \
                -collate "\uFFFF" \
                -inputformat raw \
                -inputformatapply {index data} \
                -inputformatoptions "" \
                -outputformat raw \
                -outputformatoptions "" \
                -cols "\uFFFF" \
                -debug 0 -db "" -stacktrace 0 -splits "\uFFFF" -showsplits 0] \
        -required {all} \
        -extras {none} \
        -commandprocessors {}\
    ]

    proc sort {stringlist args} {
        #puts stdout "natsort::sort args: $args"
        variable debug
        variable sort_flagspecs
        if {![llength $stringlist]} return
        if {[llength $stringlist] == 1} {
            if {"-inputformat" ni $args && "-outputformat" ni $args} {
                return $stringlist
            }
        }

        #allow pass through of the check_flags flag -debugargs so it can be set by the caller
        set debugargs 0
        if {[set posn [lsearch $args -debugargs]] >=0} {
            if {$posn == [llength $args]-1} {
                #-debugargs at tail of list
                set debugargs 1
            } else {
                set debugargs [lindex $args $posn+1]
            }
        }

        #-return flagged|defaults doesn't work Review.
        #flagfilter global processor/allocator not working 2023-08

        set opts [check_flags {*}$sort_flagspecs -debugargs $debugargs -values $args]

        #we can only shortcircuit input list of single element at this point if there aren't non-default -inputformat or -outputformat transformations
        if {[llength $stringlist] == 1} {
            set is_basic 1
            foreach fname [list -inputformat -outputformat] {
                if {[dict get $sort_flagspecs -defaults $fname] ne [dict get $opts $fname]} {
                    set is_basic 0
                    break
                }
            }
            if {$is_basic} {
                return $stringlist
            }
        }


        set winlike                     [dict get $opts -winlike]
        set topchars                    [dict get $opts -topchars]
        set cols                        [dict get $opts -cols]
        set debug                       [dict get $opts -debug]
        set stacktrace                  [dict get $opts -stacktrace]
        set showsplits                  [dict get $opts -showsplits]
        set splits                      [dict get $opts -splits]
        set sortmethod                  [dict get $opts -sortmethod]
        set opt_collate                 [dict get $opts -collate]
        set opt_inputformat             [dict get $opts -inputformat]
        set opt_inputformatapply        [dict get $opts -inputformatapply]
        set opt_inputformatoptions      [dict get $opts -inputformatoptions]
        set opt_outputformat            [dict get $opts -outputformat]
        set opt_outputformatoptions     [dict get $opts -outputformatoptions]

        if {$debug} {
            #dict unset opts -showsplits
            #dict unset opts -splits
            puts stdout "natsort::sort processed_args: $opts"
            if {$debug == 1} {
                puts stdout "natsort::sort - try also -debug 2, -debug 3"
            }
        }

        #set sortmethod "-dictionary" ;# sorts a2b before a001b   - possibly other strangenesses that are hard to reason about
        switch -- $sortmethod {
            dictionary - ascii {
                set sortmethod "-$sortmethod"
                # -ascii is default for tcl lsort.
            }
            default {
                set sortmethod "-ascii"
            }
        }

        set allowed_collations [list nocase]
        if {$opt_collate ne "\uFFFF"} {
            if {$opt_collate ni $allowed_collations} {
                error "natsort::sort unknown value for -collate option. Only acceptable value(s): $allowed_collations"
            }
            set nocaseopt "-$opt_collate"
        } else {
            set nocaseopt ""
        }
        set allowed_inputformats [list tcl raw csv words]
        switch -- $opt_inputformat {
            tcl - raw - csv - words {}
            default {
                error "natsort::sort unknown value for -inputformat option. Only acceptable value(s): $allowed_inputformats"
            }
        }
        set allowed_outputformats [list tcl raw csv words]
        switch -- $opt_outputformat {
            tcl - raw - csv - words {}
            default {
                error "natsort::sort unknown value for -outputformat option. Only acceptable value(s): $allowed_outputformats"
            }
        }

        #
        set winsplits [list / . _]
        set commonsplits [list / . _ -]
        #set commonsplits [list]

        set tagconfig [dict create]
        dict set tagconfig last_part_text_tag "<19>"
        if {$winlike} {
            set splitchars $winsplits
            #windows explorer sorts leading spaces at the top - which doesn't seem very helpful for keeping things together - but the explorer doesn't seem able to create leading spaces anyway.
            set wintop [list "(" ")" { }  {.} {_}] ;#windows specific order
            foreach t $topchars {
                if {$t ni $wintop} {
                    lappend wintop $t
                }
            }
            set topchars $wintop
            dict set tagconfig last_part_text_tag ""
        } else {
            set splitchars $commonsplits
        }
        if {$splits ne "\uFFFF"} {
            set splitchars $splits
        }
        dict set tagconfig original_splitchars $splitchars
        dict set tagconfig showsplits $showsplits

        #create topdict
        set i 0
        set topdict [dict create]
        foreach c $topchars {
            incr i ;#start at 01 so that 00 reserved for final-split tag (allows x-0.1.txt to sort above x-0.1.1.txt  by default. Use tagconfig to change, or choose -winlike 1 for explorer-like sorting)
            dict set topdict $c "<0$i>"
        }
        set keylist [list]

        switch -- $opt_inputformat {
            tcl {
                set lineinput_transform [list  _lineinput_as_tcl $opt_inputformatoptions]
            }
            csv {
                set lineinput_transform [list  _lineinput_as_csv $opt_inputformatoptions]
            }
            raw {
                set lineinput_transform [list _lineinput_as_raw $opt_inputformatoptions]
            }
            words {
                set lineinput_transform [list _lineinput_as_words $opt_inputformatoptions]
            }
        }
        switch -- $opt_outputformat {
            tcl {
                set lineoutput_transform [list  _lineoutput_as_tcl $opt_outputformatoptions]
            }
            csv {
                set lineoutput_transform [list  _lineoutput_as_csv $opt_outputformatoptions]
            }
            raw {
                set lineoutput_transform [list _lineoutput_as_raw $opt_outputformatoptions]
            }
            words {
                set lineoutput_transform [list _lineoutput_as_words $opt_outputformatoptions]
            }
        }

        if {("data" in $opt_inputformatapply) || ("index" in $opt_inputformatapply)} {
            if {$opt_inputformat eq "raw"} {
                set tf_stringlist $stringlist
            } else {
                set tf_stringlist [list]
                foreach v $stringlist {
                    lappend tf_stringlist [{*}$lineinput_transform $v]
                }
            }
            if {"data" in $opt_inputformatapply} {
                set tf_data_stringlist $tf_stringlist
            } else {
                set tf_data_stringlist $stringlist
            }
            if {"index" in $opt_inputformatapply} {
                set tf_index_stringlist $tf_stringlist
            } else {
                set tf_index_stringlist $stringlist
            }
        } else {
            set tf_data_stringlist $stringlist
            set tf_index_stringlist $stringlist
        }


        if {$stacktrace} {
            puts stdout [natsort::stacktrace]
            set natsort::stacktrace_on 1
        }
        if {$cols eq "\uFFFF"} {
            set colkeys [lmap v $stringlist {}]
        } else {
            set colkeys [list]
            foreach v $tf_index_stringlist {
                set lineparts $v
                set k [list]
                foreach c $cols {
                    lappend k [lindex $lineparts $c]
                }
                lappend colkeys [join $k "_"] ;#use a common-split char - Review
            }
        }
        #puts stdout "colkeys: $colkeys"

        if {$opt_inputformat eq "raw"} {
            #no inputformat was applied - can just use stringlist
            foreach value $stringlist ck $colkeys {
                set contentindex   [build_key $value $splitchars $topdict $tagconfig $debug]
                set colindex       [build_key $ck $splitchars $topdict $tagconfig $debug]
                lappend keylist ${colindex}-${contentindex}-$value ;#note: entire raw value  used for final sort disambiguation (can be whitespace that was ignored in indexing)
            }
        } else {
            foreach keyinput $tf_index_stringlist datavalue  $tf_data_stringlist ck $colkeys {
                #data may or may not have been transformed
                #column index may or may not have been built with transformed data

                set contentindex   [build_key $keyinput $splitchars $topdict $tagconfig $debug]
                set colindex       [build_key $ck $splitchars $topdict $tagconfig $debug]
                lappend keylist ${colindex}-${contentindex}-$datavalue ;#note: entire value  used for final sort disambiguation (can be whitespace that was ignored in indexing)
            }
        }
        #puts stderr "keylist: $keylist"

        ###################################################################################################
        #  Use the generated keylist to do the actual sorting
        #  select either the transformed or raw data as the corresponding output
        ###################################################################################################
        if {[string length $nocaseopt]} {
            set sortcommand [list lsort $sortmethod $nocaseopt -indices $keylist]
        } else {
            set sortcommand [list lsort $sortmethod -indices $keylist]
        }
        if {$opt_outputformat eq "raw"} {
            #raw output means no further transformations - it doesn't mean there wasn't a transform applied on the input side
            #use the tf_data_stringlist in the output - which will be the same as the input stringlist if no input transform applied for data.
            #(Also - it may or may not have been *sorted* on transformed data depending on whether 'index' was in $opt_inputformatapply)
            foreach idx [{*}$sortcommand] {
                lappend result [lindex $tf_data_stringlist $idx]
            }
        } else {
            #we need to apply an output format
            #The data may or may not have been transformed at input
            foreach idx [{*}$sortcommand] {
                lappend result [{*}$lineoutput_transform [lindex $tf_data_stringlist $idx]]
            }
        }
        ###################################################################################################


        if {$debug >= 2} {
            set screen_width 250
            set max_val 0
            set max_idx 0
            #####  calculate colum widths
            foreach i [{*}$sortcommand] {
                set len_val [string length [lindex $stringlist $i]]
                if {$len_val > $max_val} {
                    set max_val $len_val
                }
                set len_idx [string length [lindex $keylist $i]]
                if {$len_idx > $max_idx} {
                    set max_idx $len_idx
                }
            }
            ####
            set l_width [expr {$max_val + 1}]
            set leftcol [string repeat " " $l_width]
            set r_width [expr {$screen_width - $l_width - 1}]
            set rightcol [string repeat " " $r_width]
            set str [overtype::left $leftcol RAW]
            puts stdout "  $str Index with possibly transformed data at tail"
            foreach i [{*}$sortcommand] {
                #puts stdout "|d> [overtype::left  $leftcol [lindex $stringlist $i] ]  [lindex $keylist $i]"
                set index [lindex $keylist $i]
                set len_idx [string length $index]
                set rowcount [expr {$len_idx / $r_width}]
                if {($len_idx % $r_width) > 0} {
                    incr rowcount
                }
                set rows [list]
                for {set r 0} {$r < $rowcount} {incr r} {
                    lappend rows [string range $index 0 $r_width-$r]
                    set index [string range $index $r_width end]
                }

                set r 0
                foreach idxpart $rows {
                    if {$r == 0} {
                        #use the untransformed stringlist
                        set str [overtype::left  $leftcol [lindex $stringlist $i]]
                    } else {
                        set str [overtype::left  $leftcol ...]]
                    }
                    puts stdout "  $str $idxpart"
                    incr r
                }
                #puts stdout "|> '[lindex $stringlist $i]'"
                #puts stdout "|>    [lindex $keylist $i]"
            }

            puts stdout "|debug> topdict:    $topdict"
            puts stdout "|debug> splitchars: $splitchars"
        }
        return $result
    }


    #Note that although leading whitespace isn't a commonly used feature of filesystem names - it's possible at least on  FreeBSD,windows and linux so we should try to handle it sensibly.
    proc sort_experiment {stringlist args} {
        package require sqlite3

        variable debug
        set args [check_flags -caller natsort::sort \
          -defaults [dict create -db :memory:  -collate nocase -nullvalue "->NULL<"  -winlike 0  -topchars [list] -debug 0] \
          -extras {all} \
          -values $args]
        set db              [string trim [dict get $args -db]]
        set collate         [string trim [dict get $args -collate]]
        set winlike     [string trim [dict get $args -winlike]]
        set debug           [string trim [dict get $args -debug]]
        set nullvalue       [string trim [dict get $args -nullvalue]]


        set topchars        [string trim [dict get $args -topchars]]

        set topdot [expr {"." in $topchars}]
        set topunderscore [expr {"_" in $topchars}]


        sqlite3 db_natsort2 $db
        #--
        #our table must handle the name with the greatest number of numeric/non-numeric splits.
        #This means a single list member with pathological naming e.g a1a1a1a1a1a1a1a1a1a1a1.txt could greatly extend the number of columns and indices and affect performance.
        #review: could be optimised to aggregate the tail into a single index, as the the extra columns won't assist in ordering, but make the table and query bigger.
        # we should probably determine the longest common sequence of splits in the input list and add only one more index for the segment after that.
        set maxsegments 0
        #--
        set prefix "idx"

        #note - there will be more columns in the sorting table than segments.
        # (a segment equals one of the numeric & non-numeric string portions returned from 'split_numeric_sgements')
        #---------------------------
        # consider
        #   a123b.v1.2.txt
        #   a123b.v1.3beta1.txt
        # these have the following segments:
        #   a 123 b.v 1 . 2 .txt
        #   a 123 b.v 1 . 3 beta 1 .txt
        #---------------------------
        # The first string has 7 segments  (numbered 0 to 6)
        # the second string has 9 segments
        #
        # for example when the data has any elements in a segment position that are numeric (e.g 0001 123)
        #  - then an index column with numeric equivalents will be created (e.g 0001 becomes 1), and any non-numeric values in that column will get mapped to a negative value (for special cases) or a high value such as NULL  (with  NULLS LAST sql support)
        #
        # when a segment

        #cycle through all strings - we cannot build tabledef as we go because the column type depends on whether all segments for a particular column are text vs int-equivalent.
        array set segmentinfo {}
        foreach nm $stringlist {
            set segments [split_numeric_segments $nm]
            if {![string length [string trim [lindex $segments 0]]]} {
                if {[string is digit [string trim [lindex $segments 1]]]} {
                    #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though)
                    set segments [lrange $segments 1 end]
                }
            }


            set c 0 ;#start of index columns
            if {[llength $segments] > $maxsegments} {
                set maxsegments [llength $segments]
            }
            foreach seg $segments {
                set seg [string trim $seg]
                set column_exists [info exists segmentinfo($c,type)]
                if {[string is digit $seg]} {
                    if {$column_exists} {
                        #override it  (may currently be text or int)
                        set segmentinfo($c,type) "int"
                    } else {
                        #new column
                        set segmentinfo($c,name) ${prefix}$c
                        set segmentinfo($c,type) "int"
                    }
                } else {
                    #text never overrides int
                    if {!$column_exists} {
                        set segmentinfo($c,name) ${prefix}$c
                        set segmentinfo($c,type) "text"
                    }
                }
                incr c
            }
        }
        if {$debug} {
            puts stdout "Largest number of num/non-num segments in data: $maxsegments"
            #parray segmentinfo
        }

        #
        set tabledef ""
        set ordered_column_names [list]
        set ordered_segmentinfo_tags [lsort -dictionary [array names segmentinfo *]]
        foreach k $ordered_segmentinfo_tags {
            lassign [split $k ,] c tag
            if {$tag eq "type"} {
                set type [set segmentinfo($k)]
                if {$type eq "int"} {
                    append tabledef "$segmentinfo($c,name) int,"
                } else {
                    append tabledef "$segmentinfo($c,name) text COLLATE $collate,"
                }
                append tabledef "raw$c text COLLATE $collate,"
                lappend ordered_column_names $segmentinfo($c,name)
                lappend ordered_column_names raw$c  ;#additional index column not in segmentinfo
            }
            if {$tag eq "name"} {
                #lappend ordered_column_names $segmentinfo($k)
            }
        }
        append tabledef "name text"

        #puts stdout "tabledef:$tabledef"


        db_natsort2 eval [string map [list %tabledef% $tabledef] {create table natsort(%tabledef%)}]


        foreach nm $stringlist {
            array unset intdata
            array set intdata {}
            array set rawdata {}
            #init array and build sql values string
            set sql_insert "insert into natsort values("
            for {set i 0} {$i < $maxsegments} {incr i} {
                set intdata($i) ""
                set rawdata($i) ""
                append sql_insert "\$intdata($i),\$rawdata($i),"
            }
            append sql_insert "\$nm"  ;#don't manipulate name value in any way - e.g must leave all whitespace as the sort must return exactly the same elements as in the original list.
            append sql_insert ")"

            set segments [split_numeric_segments $nm]
            if {![string length [string trim [lindex $segments 0]]]} {
                if {[string is digit [string trim [lindex $segments 1]]]} {
                    #name is whitespace followed by a digit - special case - ignore the whitespace for numbers only. (whitespace still goes through to name column though)
                    set segments [lrange $segments 1 end]
                }
            }
            set values ""
            set c 0
            foreach seg $segments {
                if {[set segmentinfo($c,type)] eq "int"} {
                    if {[string is digit [string trim $seg]]} {
                        set intdata($c) [trimzero [string trim $seg]]
                    } else {
                        catch {unset intdata($c)} ;#set NULL - sorts last
                        if {($c == 0) && ($topunderscore) && [string match _* [string trim $seg]]} {
                            set intdata($c) -100
                        }
                        if {($c == 0) && ($topdot) && [string match .* [string trim $seg]]} {
                            set intdata($c) -50
                        }
                    }
                    set rawdata($c)  [string trim $seg]
                } else {
                    #pure text column
                    #set intdata($c) [string trim $seg] ;#ignore leading/trailing whitespace - we sort first on trimmed version, then refine with the sort on rawdata index
                    #catch {unset indata($c)}
                    set indata($c) [string trim $seg]
                    set rawdata($c)  $seg
                }
                #set rawdata($c) [string trim $seg]#
                #set rawdata($c)  $seg
                incr c
            }
            db_natsort2 eval $sql_insert
        }

        set orderedlist [list]

        if {$debug} {
            db_natsort2 eval {select * from pragma_table_info('natsort')} rowdata {
                parray rowdata
            }
        }
        set orderby "order by "

        foreach cname $ordered_column_names {
            if {[string match "idx*" $cname]} {
                append orderby "$cname ASC  NULLS LAST,"
            } else {
                append orderby "$cname ASC,"
            }
        }
        append orderby " name ASC"
        #append orderby " NULLS LAST" ;#??

        #e.g "order by idx0 ASC, raw0 ASC, idx1 ASC ....   name ASC"
        if {$debug} {
            puts stdout "orderby clause: $orderby"
        }
        db_natsort2 eval [string map [list %orderby% $orderby] {select * from natsort %orderby%}] rowdata {
            set line "- "
            #parray rowdata
            set columnnames $rowdata(*)
            #puts stdout "columnnames: $columnnames"
            #[lsort -dictionary [array names rowdata]
            append line "$rowdata(name) \n"
            foreach nm $columnnames {
                if {$nm ne "name"} {
                    append line "$nm: $rowdata($nm) "
                }
            }
            #puts stdout $line
            #puts stdout "$rowdata(name)"
            lappend orderedlist $rowdata(name)
        }

        db_natsort2 close
        return $orderedlist
    }
}


#application section  e.g this file might be linked from /usr/local/bin/natsort
namespace eval natsort {
    namespace import ::flagfilter::check_flags

    proc called_directly_namematch {} {
        global argv0
        if {[info script] eq ""} {
            return 0
        }
        #see https://wiki.tcl-lang.org/page/main+script
        #trailing ... let's us resolve symlinks in last component of the path (could be something else like ___ but ... seems unlikely to collide with anything in the filesystem)
        if {[info exists argv0]
        &&
            [file dirname [file normalize [file join [info script] ...]]]
            eq
            [file dirname [file normalize [file join $argv0 ...]]]
        } {
            return 1
        } else {
            #puts stdout "norm info script: [file dirname [file normalize [file join [info script] ...]]]"
            #puts stdout "norm argv0      : [file dirname [file normalize [file join $argv0 ...]]]"
            return 0
        }
    }
    #Review issues around comparing names vs using inodes (esp with respect to samba shares)
    proc called_directly_inodematch {} {
        global argv0

        if {[info exists argv0]
         &&  [file exists [info script]] && [file exists $argv0]} {
            file stat $argv0        argv0Info
            file stat [info script] scriptInfo
            if {$argv0Info(ino) == 0 || $scriptInfo(ino) == 0 || $argv0Info(dev) == 0 || $scriptInfo(dev) == 0} {
                #vfs?
                #e.g //zipfs:/
                return 0
            }
            return [expr {$argv0Info(dev) == $scriptInfo(dev)
               && $argv0Info(ino) == $scriptInfo(ino)}]
        } else {
            return 0
        }
    }

    if {![interp issafe]} {
        set is_namematch [called_directly_namematch]
        set is_inodematch [called_directly_inodematch]
        ####
        #review - reliability of mechanisms to determine direct calls
        # we don't want application being called when being used as a library, but we need it to run if called directly or from symlinks etc
        #-- choose a policy and leave the others commented.
        #set is_called_directly $is_namematch
        #set is_called_directly $is_inodematch

        #puts "NATSORT: called_directly_namematch - $is_namematch"
        #puts "NATSORT: called_directly_inodematch - $is_inodematch"
        #flush stdout

        set is_called_directly [expr {$is_namematch || $is_inodematch}]
        #set is_called_directly [expr {$is_namematch && $is_inodematch}]
        ###


        #puts stdout "called_directly_name: [called_directly_namematch]  called_directly_inode: [called_directly_inodematch]"
    } else {
        #safe interp
        set is_called_directly 0
    }


    proc test_pass_fail_message {pass {additional ""}} {
        variable test_fail_msg
        variable test_pass_msg
        if {$pass} {
            puts stderr $test_pass_msg
        } else {
            puts stderr $test_fail_msg
        }
        puts stderr $additional
    }

    variable test_fail_msg "XXXXXXXXXXXX   FAIL   XXXXXXXXXXXXX"
    variable test_pass_msg "------------   PASS   -------------"
    proc test_sort_1 {args} {
        package require struct::list
        puts stderr "---$args"
        set args [check_flags -caller natsort:test_sort_1 -defaults [list  -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] -values $args]

        puts stderr "test_sort_1 got args: $args"

            set unsorted_input {
                2.2.2
                2.2.2.2
                1a.1.1
                1a.2.1.1
                1.12.1
                1.2.1.1
                1.02.1.1
                1.002b.1.1
                1.1.1.2
                1.1.1.1
            }
            set input {
1.1.1
1.1.1.2
1.002b.1.1
1.02.1.1
1.2.1.1
1.12.1
1a.1.1
1a.2.1.1
2.2.2
2.2.2.2
            }

            set sorted [natsort::sort $input {*}$args]
            set is_match [struct::list equal $input $sorted]

            set msg "windows-explorer order"

            test_pass_fail_message $is_match $msg
            puts stdout [string repeat - 40]
            puts stdout INPUT
            puts stdout [string repeat - 40]
            foreach item $input {
                puts stdout $item
            }
            puts stdout [string repeat - 40]
            puts stdout OUTPUT
            puts stdout [string repeat - 40]
            foreach item $sorted {
                puts stdout $item
            }
            test_pass_fail_message $is_match  $msg
            return [expr {!$is_match}]
    }
    proc test_sort_showsplits {args} {
        package require struct::list

        set args [check_flags -caller natsort:test_sort_1 \
          -defaults [list  -collate nocase -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 1 ] \
          -extras {all} \
          -values $args]

        set input1 {
            a-b.txt
            a.b.c.txt
            b.c-txt
        }


        set input2 {
            a.b.c.txt
            a-b.txt
            b.c-text
        }

        foreach {msg testlist } [list "custom-order" $input1  "windows-explorer (should work with -winlike 1)" $input2] {
            set sorted [natsort::sort $testlist {*}$args]
            set is_match [struct::list equal $testlist $sorted]

            test_pass_fail_message $is_match  $msg
            puts stderr "INPUT"
            puts stderr "[string repeat - 40]"
            foreach item $testlist {
                puts stdout $item
            }
            puts stderr "[string repeat - 40]"
            puts stderr "OUTPUT"
            puts stderr "[string repeat - 40]"
            foreach item $sorted {
                puts stdout $item
            }

            test_pass_fail_message $is_match  $msg
        }

        #return [expr {!$is_match}]

    }

    #tcl proc dispatch order - non flag items up front
    #trailing flags are paired even if supplied as solo flags e.g -l becomes -l 1
    proc commandline_ls {args} {
        set operands [list]
        set posn 0
        foreach a $args {
          if {![string match -* $a]} {
            lappend operands $a
          } else {
            set flag1_posn $posn
            break
          }
          incr posn
        }
        set args [lrange $args $flag1_posn end]


        set debug 0
        set posn [lsearch $args -debug]
        if {$posn > 0} {
          if {[lindex $args $posn+1]} {
              set debug [lindex $args $posn+1]
          }
        }
        if {$debug} {
           puts stderr "|debug>commandline_ls got $args"
        }

        #if first operand not supplied - replace it with current working dir
        if {[lindex $operands 0] eq "\uFFFF"} {
            lset operands 0 [pwd]
        }

        set targets [list]
        foreach op $operands {
          if {$op ne "\uFFFF"} {
              set opchars [split [file tail $op] ""]
              if {"?" in $opchars || "*" in $opchars} {
                lappend targets $op
              } else {
                #actual file or dir
                set targetitem $op
                set targetitem [file normalize $op]
                if {![file exists $targetitem]} {
                    if {$debug} {
                      puts stderr "|debug>commandline_ls Unable to access path '$targetitem'"
                    }
                }
                lappend targets $targetitem
                if {$debug} {
                  puts stderr "|debug>commandline_ls  listing for $targetitem"
                }
             }
          }
        }
        set args [check_flags -caller commandline_ls \
          -return flagged|defaults \
          -debugargs 0 \
          -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 -algorithm sort] \
          -required {all} \
          -extras {all} \
          -soloflags {-v -l} \
          -commandprocessors {} \
          -values $args ]
        if {$debug} {
            puts stderr "|debug>args: $args"
        }


        set algorithm [dict get $args -algorithm]
        dict unset args -algorithm

        set allfolders [list]
        set allfiles [list]
        foreach item $targets {
            if {[file exists $item]} {
                if {[file type $item] eq "directory"} {
                  set dotfolders [glob -nocomplain -directory $item -type {d} -tail .*]
                  set folders [glob -nocomplain -directory $item -type {d} -tail *]
                  set allfolders [concat $allfolders $dotfolders $folders]

                  set dotfiles [glob -nocomplain -directory $item -type {f} -tail .*]
                  set files [glob -nocomplain -directory $item -type {f} -tail *]
                  set allfiles [concat $allfiles $dotfiles $files]
                } else {
                  #file (or link?)
                  set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]]
                  set allfiles [concat $allfiles $files]
                }
            } else {
                 set folders [glob -nocomplain -directory $item -type {d} -tail [file tail $item]]
                 set allfolders [concat $allfolders $folders]
                 set files [glob -nocomplain -directory [file dirname $item] -tail [file tail $item]]
                 set allfiles [concat $allfiles $files]
            }
        }


        set sorted_folders [natsort::sort $allfolders {*}$args]
        set sorted_files [natsort::sort $allfiles {*}$args]

        foreach fold $sorted_folders {
            puts stdout $fold
        }
        foreach file $sorted_files {
            puts stdout $file
        }

        return "-- ok printed to stdout [llength $sorted_folders] folders and [llength $sorted_files] files --"
    }

    package require argp
    argp::registerArgs commandline_test {
      { -showsplits     boolean   0}
      { -stacktrace     boolean   0}
      { -debug          boolean   0}
      { -winlike        boolean   0}
      { -db             string   ":memory:"}
      { -collate        string   "nocase"}
      { -algorithm      string   "sort"}
      { -topchars       string   "\uFFFF"}
      { -testlist       string   {10 1 30 3}}
    }
    argp::setArgsNeeded commandline_test {-stacktrace}
    proc commandline_test {test args} {
      variable testlist
      puts stdout "commandline_test got $args"
      argp::parseArgs opts
      puts stdout "commandline_test got [array get opts]"
      set args [check_flags -caller natsort_commandline \
        -return flagged|defaults \
        -defaults [list -db :memory: -collate nocase -testlist $testlist -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
        -values $args]

      if {[string tolower $test] in [list "1" "true"]} {
          set test "sort"
      } else {
        if {![llength [info commands $test]]} {
          error "test $test not found"
        }
      }
      dict unset args -test
      set stacktrace [dict get $args -stacktrace]
      # dict unset args -stacktrace

      set argtestlist [dict get $args -testlist]
      dict unset args -testlist


      set debug [dict get $args -debug]

      set collate       [dict get $args -collate]
      set db            [dict get $args -db]
      set winlike       [dict get $args -winlike]
      set topchars      [dict get $args -topchars]


      puts stderr "|test>-----start natsort::$test--- input list size : [llength $argtestlist]"
      #set resultlist [$test $argtestlist -db $db -collate $collate -topchars $topchars  -winlike $winlike]
      set resultlist [$test  $argtestlist {*}$args]
      foreach nm $resultlist {
          puts stdout $nm
      }
      puts stdout "|test>-----end   natsort::$test--- sorted list size: [llength $resultlist]"
      return "test end"
    }
    proc commandline_runtests {runtests args} {
        set argvals [check_flags -caller commandline_runtests \
          -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits "\uFFFF" -runtests 1] \
          -values $args]

        puts stderr "runtests args: $argvals"

        #set runtests [dict get $argvals -runtests]
        dict unset argvals -runtests
        dict unset argvals -algorithm

        puts stderr "runtests args: $argvals"
        #exit 0

        set test_prefix "::natsort::test_sort_"

        if {$runtests eq "1"} {
            set runtests "*"
        }


        set testcommands [info commands ${test_prefix}${runtests}]
        if {![llength $testcommands]} {
            puts stderr "No test commands matched -runtests argument '$runtests'"
            puts stderr "Use 1 to run all tests"
            set alltests [info commands ${test_prefix}*]
            puts stderr "Valid tests are:"

            set prefixlen [string length $test_prefix]
            foreach t $alltests {
                set shortname [string range $t $prefixlen end]
                puts stderr "$t   =  -runtests  $shortname"
            }

        } else {
            foreach cmd $testcommands {
                puts stderr [string repeat - 40]
                puts stderr "calling $cmd with args: '$argvals'"
                puts stderr [string repeat - 40]
                $cmd {*}$argvals
            }
        }
        exit 0
    }
    proc help {args} {
        puts stdout "natsort::help got '$args'"
        return "Help not implemented"
    }
    proc natsort_pipe {args} {
          #PIPELINE to take input list on stdin and write sorted list to stdout
          #strip - from arglist
          #set args [check_flags -caller natsort_pipeline \
          #  -return all \
          #  -defaults [list -db :memory: -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
          #  -values $args]


          set debug [dict get $args -debug]
          if {$debug} {
            puts stderr "|debug> natsort_pipe got args:'$args'"
          }
          set algorithm [dict get $args -algorithm]
          dict unset args -algorithm

          set proclist [info commands ::natsort::sort*]
          set algos [list]
          foreach p $proclist {
              lappend algos [namespace tail $p]
          }
          if {$algorithm ni [list {*}$proclist {*}$algos]} {
              do_error "valid sort mechanisms: $algos" 2
          }


          set input_list [list]
          while {![eof stdin]} {
              if {[gets stdin line] > 0} {
                  lappend input_list $line
              } else {
                  if {[eof stdin]} {

                  } else {
                      after 10
                  }
              }
          }

          if {$debug} {
              puts stderr "|debug> received [llength $input_list] list elements"
          }

          set resultlist [$algorithm $input_list {*}$args]
          if {$debug} {
              puts stderr "|debug> returning [llength $resultlist] list elements"
          }
          foreach r $resultlist {
              puts stdout $r
          }
          #exit 0

    }
    if {($is_called_directly)} {
            set cmdprocessors {
                {helpfinal {match "^help$" dispatch natsort::help}}
                {helpfinal {sub -topic default "NONE"}}
            }
            #set args [check_flags \
            #  -caller test1 \
            #  -debugargs 2 \
            #  -return arglist \
            #  -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] \
            #  -required {none} \
            #  -extras {all} \
            #  -commandprocessors $cmdprocessors \
            #  -values $::argv ]
            interp alias {} do_filter {} ::flagfilter::check_flags

            #mashopts are generally single-letter opts that can be run together e.g -l -d  as -ld
            set cmdprocessors {
                {helpcmd {match "^help$" dispatch natsort::help singleopts {-v}}}
                {helpcmd {sub -operand default \uFFFF singleopts {-l}}}
                {lscmd  {match "^ls$" dispatch natsort::commandline_ls dispatchtype tcl dispatchglobal 1 mashopts {-l -a} singleopts {-l -a} pairopts {} longopts {--color=always}}}
                {lscmd {sub dir default "\uFFFF"}}
                {lscmd {sub dir2 default "\uFFFF"}}
                {lscmd {sub dir3 default "\uFFFF"}}
                {lscmd {sub dir4 default "\uFFFF"}}
                {lscmd {sub dir5 default "\uFFFF"}}
                {lscmd {sub dir6 default "\uFFFF"}}
                {runtests {match "^-tests$" dispatch natsort::commandline_runtests singleopts {-l}}}
                {runtests {sub testname  default "1" singleopts {-l}}}
                {pipecmd {match "^-$"    dispatch natsort::natsort_pipe dispatchtype tcl}}
            }
            set arglist [do_filter \
              -debugargs 0 \
              -debugargsonerror 2 \
              -caller cline_dispatch1 \
              -return all \
              -soloflags {-v -x} \
              -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \
              -required {all} \
              -extras {all} \
              -commandprocessors $cmdprocessors \
              -values $::argv ]


            #mashopts are generally single-letter opts that can be run together e.g -l -d  as -ld
            set cmdprocessors {
                {testcmd {match "^test$" dispatch natsort::commandline_test singleopts {-l}}}
                {testcmd {sub testname  default "1" singleopts {-l}}}
            }
            set arglist [check_flags \
              -debugargs 0 \
              -caller cline_dispatch2 \
              -return all \
              -soloflags {-v -l} \
              -defaults [list  -collate nocase -algorithm sort -testlist "1 2 3 10" -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0 ] \
              -required {all} \
              -extras {all} \
              -commandprocessors $cmdprocessors \
              -values $::argv ]


            #set cmdprocessors [list]
            #set args [check_flags -caller test1 -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors $cmdprocessors -values $::argv ]

            #set args [check_flags -caller test1 -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {-cmd {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ]
            #set args [check_flags -caller test1 -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {{-cmd -default help} {-cmd -cmdarg1 -default "."} {-cmd -cmdarg2 -default j}} -values $::argv ]
            #set args [check_flags -caller test1 -defaults [list  -collate nocase -algorithm sort -topchars "\uFFFF" -winlike 0 -debug 0 -stacktrace 0 -showsplits 0] -required {all} -extras {none} -commandprocessors {ls {ls lsdir -default "\uFFFF"}} -values $::argv ]

            puts stderr "natsort directcall exit"
            flush stderr
            exit 0

        if {$::argc} {

        }
    }
}


package provide natsort [namespace eval natsort {
    variable version
    set version 0.1.1.6
}]