punkshell/src/bootsupport/modules/punk/du-0.1.0.tm

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2023
#
# @@ Meta Begin
# Application punk::du 0.1.0
# Meta platform     tcl
# Meta license      <unspecified>
# @@ Meta End


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Requirements
##e.g package require frobz

namespace eval punk::du {
    variable has_twapi 0
}
if {"windows" eq $::tcl_platform(platform)} {
    if {[catch {package require twapi}]} {
        puts stderr "Warning: punk::du - unable to load twapi. Disk operations may be much slower on windows without the twapi package"
    } else {
        set punk::du::has_twapi 1
    }
    package require punk::winpath
}


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::du {


    proc dirlisting {{folderpath {}}} {
        if {[lib::pathcharacterlen $folderpath] == 0} {
           set folderpath [pwd]
        } elseif {[file pathtype $folderpath] ne "absolute"} {
            #file normalize relativelly slow - avoid in inner loops
            #set folderpath [file normalize $folderpath]

        }
        #run whichever of du_dirlisting_twapi, du_dirlisting_generic, du_dirlisting_unix has been activated
        set dirinfo [active::du_dirlisting $folderpath]
    }


    #Note that unix du seems to do depth-first - which makese sense when piping.. as output can be emitted as we go rather than requiring sort at end.
    #breadth-first with sort can be quite fast .. but memory usage can easily get out of control
    proc du { args } {
        variable has_twapi
        package require struct::set


        if 0 {
        switch -exact [llength $args] {
            0 {
                set dir .
                set switch -k
            }
            1 {
                set dir $args
                set switch -k
            }
            2 {
                set switch [lindex $args 0]
                set dir [lindex $args 1]
            }
            default {
                set msg "only one switch and one dir "
                append msg "currently supported"
                return -code error $msg
            }
        }

        set switch [string tolower $switch]

        set -b 1
        set -k 1024
        set -m [expr 1024*1024]
        }


        set opts $args
        # flags in args are solos (or longopts --something=somethingelse) or sometimes pairopts
        # we don't currently support mashopts  (ie -xy  vs separate -x -y)


        #-------------------------------------------------------
        # process any pairopts first and remove the pair
        # (may also process some solo-opts)

        set opt_depth -1
        if {[set posn [lsearch $opts -d]] >= 0} {
            set opt_depth [lindex $opts $posn+1]
            set opts [lreplace $opts $posn $posn+1]
        }
        foreach o $opts {
            if {[string match --max-depth=* $o]} {
                set opt_depth [lindex [split $o =] 1]
                if {![string is integer -strict $opt_depth]} {
                    error "--max-depth=n  n must be an integer"
                }
            }
        }
        #-------------------------------------------------------
        #only solos and longopts remain in the opts now


        set lastarg [lindex $opts end]
        if {[string length $lastarg] && (![string match -* $lastarg])} {
            set dir $lastarg
            set opts [lrange $opts 0 end-1]
        } else {
            set dir .
            set opts $opts
        }
        foreach a $opts {
            if {![string match -* $a]} {
                error "unrecognized option '$a'"
            }
        }

        set -b 1
        set -k 1024
        set -m [expr 1024*1024]
        set switch -k ;#default (same as unix)
        set lc_opts [string tolower $opts]


        if {"-b" in $lc_opts} {
            set switch -b
        } elseif {"-k" in $lc_opts} {
            set switch -k
        } elseif {"-m" in $lc_opts} {
            set switch -m
        }
        set opt_progress 0
        if {"--prog" in $lc_opts || "--progress" in $lc_opts} {
            set opt_progress 1
        }
        set opt_extra 0
        if {"--extra" in $lc_opts} {
            set opt_extra 1
        }
        set opt_vfs 0
        if {"--vfs" in $lc_opts} {
            set opt_vfs 1
        }


        set result [list]

        set dir_depths_remaining [list]

        set is_windows [expr {$::tcl_platform(platform) eq "windows"}]
        set zero [expr {0}]

        # ## ### ### ### ###
        # containerid and itemid
        set folders [list] ;#we lookup string by index
        lappend folders [file dirname $dir]
        lappend folders $dir ;#itemindex 1
        # ## ### ### ### ###
        if {![file isdirectory $dir]} {
            lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] [file size $dir]]
            #set ary($dir,bytes) [file size $dir]
            set leveldircount 0
        } else {
            lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] $zero]
            set leveldircount 1
        }
        set level [expr {0}]
        set nextlevel [expr {1}]
        #dir_depths list structure
        # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
        #0          1               2           3       4       5
        #i_depth    i_containerid   i_itemid    i_item  i_size  i_index
        # -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
        set i_depth         [expr {0}]
        set i_containerid   [expr {1}]
        set i_itemid        [expr {2}]
        set i_item          [expr {3}]
        set i_size          [expr {4}]
        set i_index         [expr {5}]


        set listlength [llength $dir_depths_remaining]
        set diridx 0
        #this is a breadth-first algorithm
        while {$leveldircount > 0} {
            set leveldirs 0
            set levelfiles 0
            for {set i $diridx} {$i < $listlength} {incr i} {
                #lassign [lindex $dir_depths_remaining $i] _d containeridx folderidx itm bytecount
                set folderidx [lindex $dir_depths_remaining $i $i_itemid]
                set folderpath [lindex $folders $folderidx]
                #puts stderr ->$folderpath
                #if {$i >= 20} {
                #return
                #}

                #twapi supports gathering file sizes during directory contents traversal
                #for dirlisting methods that return an empty list in filesizes whilst files has entries - we will need to populate it below
                #e.g tcl glob based dirlisting doesn't support gathering file sizes at the same time

                set in_vfs 0
                if {$opt_vfs} {
                    foreach vfsmount [vfs::filesystem info] {
                        if {[punk::repo::path_a_atorbelow_b $folderpath $vfsmount]} {
                            set in_vfs 1
                            break
                        }
                    }
                }

                if {$in_vfs} {
                    set du_info [lib::du_dirlisting_tclvfs $folderpath]
                } else {
                    #run the activated function (proc imported to active namespace and renamed)
                    set du_info [active::du_dirlisting $folderpath]
                }


                set dirs [dict get $du_info dirs]
                set files [dict get $du_info files]
                set filesizes [dict get $du_info filesizes]


                incr leveldirs  [llength $dirs]
                incr levelfiles [llength $files]

                #lappend dir_depths_remaining {*}[lmap d $dirs {::list $nextdepth [lib::du_lit $cont/$itm] $d $zero}]
                #folderidx is parent index for new dirs
                lappend dir_depths_remaining {*}[lib::du_new_eachdir $dirs $nextlevel $folderidx]

                #we don't need to sort files (unless we add an option such as -a to du (?))
                set bytecount [expr {0}]

                if {[llength $files] && ![llength $filesizes]} {
                    #listing mechanism didn't supply corresponding sizes
                    foreach filename $files {
                        #incr bytecount [file size [file join $folderpath $filename]
                        incr bytecount [file size $filename]
                    }
                } else {
                    set filesizes [lsearch -all -inline -not $filesizes[unset filesizes] na] ;#only legal non-number is na
                    set bytecount [tcl::mathop::+ {*}$filesizes]
                }


                #we can safely assume initial count was zero
                lset dir_depths_remaining $i $i_size $bytecount
                #incr diridx
            }
            #puts stdout "level: $level dirs: $leveldirs"
            if {$opt_extra} {
                puts stdout "level: $level dircount: $leveldirs filecount: $levelfiles"
            }
            incr level ;#zero based
            set nextlevel [expr {$level + 1}]
            set leveldircount [expr {[llength $dir_depths_remaining] - $listlength }]; #current - previous - while loop terminates when zero
            #puts "diridx: $diridx i: $i rem: [llength $dir_depths_remaining] listlenth:$listlength levldircount: $leveldircount"
            set diridx $i
            set listlength  [llength $dir_depths_remaining]
        }
        #puts stdout ">>> loop done"
        #flush stdout
        #puts stdout $dir_depths_remaining
        set dirs_as_encountered $dir_depths_remaining ;#index is in sync with 'folders' list
        set dir_depths_longfirst $dirs_as_encountered

        #store the index before sorting
        for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} {
            lset dir_depths_longfirst $i $i_index $i
        }
        set dir_depths_longfirst [lsort -integer -index 0 -decreasing $dir_depths_longfirst[set dir_depths_longfirst {}]]

        #store main index in the reducing list
        set dir_depths_remaining $dir_depths_longfirst
        for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} {
            #stored index at position 3
            lset dir_depths_remaining $i $i_index $i
        }

        #index 3
        #dir_depths_remaining -> dir_depths_longfirst -> dirs_as_encountered

        #puts stdout "initial dir_depths_remaining: $dir_depths_remaining"


        #summing performance is not terrible but significant on large tree - the real time is for large trees in the main loop above
        #update - on really  large trees the reverse is true especiallyl now that twapi fixed the original speed issues.. todo - rework/simplify below - review natsort
        #
        #TODO - reconsider sorting by depth.. lreverse dirs_as_encountered should work..
        if {[llength $dir_depths_longfirst] > 1} {
            set i 0
            foreach dd $dir_depths_longfirst {
                lassign $dd d parentidx folderidx item bytecount
                #set nm $cont/$item
                set nm [lindex $folders $folderidx]
                set dnext [expr {$d +1}]
                set nextdepthposns [lsearch -all -integer -index 0 $dir_depths_remaining $dnext]
                set nextdepthposns [lsort -integer -decreasing $nextdepthposns[set nextdepthposns {}]];#remove later elements first
                foreach posn $nextdepthposns {
                    set id [lindex $dir_depths_remaining $posn $i_itemid]
                    set ndirname [lindex $folders $id]
                    #set ndirname $cont/$item
                    #set item [lindex $dir_depths_remaining $posn $i_item]
                    #set ndirname [lindex $ndir 1]
                    if {[string match $nm/* $ndirname]} {
                        #puts stdout "dir $nm adding subdir size $ndirname"
                        #puts stdout "incr $nm from $ary($nm,bytes) plus $ary($ndirname,bytes)"
                        incr bytecount [lindex $dir_depths_remaining $posn $i_size]
                        set dir_depths_remaining [lreplace $dir_depths_remaining[set dir_depths_remaining {}] $posn $posn]
                    }
                }
                lset dir_depths_longfirst $i $i_size $bytecount
                set p [lsearch -index $i_index -integer $dir_depths_remaining $i]
                lset dir_depths_remaining $p $i_size $bytecount
                #set ary($nm,bytes) $bytecount
                incr i
            }
        }
        #set dir_depths_longfirst [lsort -index 1 -decreasing $dir_depths_longfirst]
        #

        set retval [list]
        #copy across the bytecounts
        for {set i 0} {$i < [llength $dir_depths_longfirst]} {incr i} {
            set posn [lindex $dir_depths_longfirst $i $i_index]
            set bytes [lindex $dir_depths_longfirst $i $i_size]
            lset dirs_as_encountered $posn $i_size $bytes
        }
        foreach dirinfo [lreverse $dirs_as_encountered] {
            set id [lindex $dirinfo $i_itemid]
            set depth [lindex $dirinfo $i_depth]
            if {($opt_depth >= 0) && $depth > $opt_depth} {
                continue
            }
            set path [lindex $folders $id]
            #set path $cont/$item
            set item [lindex $dirinfo $i_item]
            set bytes [lindex $dirinfo $i_size]
            set size [expr {$bytes / [set $switch]}]
            lappend retval [list $size $path]
        }
        # copyright 2002 by The LIGO Laboratory
        return $retval
    }
    namespace eval active {
        variable functions [list du_dirlisting ""]
        variable functions_known [dict create]

        #known functions from lib namespace
        dict set functions_known du_dirlisting [list du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix]

        proc show_functions {} {
            variable functions
            variable functions_known
            set msg ""
            dict for {callname implementations} $functions_known {
                append msg "callname: $callname" \n
                foreach imp $implementations {
                    if {[dict get $functions $callname] eq $imp} {
                        append msg "  $imp (active)" \n
                    } else {
                        append msg "  $imp" \n
                    }
                }
            }
            return $msg
        }
        proc set_active_function {callname implementation} {
            variable functions
            variable functions_known
            if {$callname ni [dict keys $functions_known]} {
                error "unknown function callname $callname"
            }
            if {$implementation ni [dict get $functions_known $callname]} {
                error "unknown implementation $implementation for callname $callname"
            }
            dict set functions $callname $implementation

            catch {rename ::punk::du::active::$callname ""}
            namespace eval ::punk::du::active [string map [list %imp% $implementation %call% $callname] {
                namespace import ::punk::du::lib::%imp%
                rename %imp% %call%
            }]

            return $implementation
        }
        proc get_active_function {callname} {
            variable functions
            variable functions_known
            if {$callname ni [dict keys $functions_known]} {
                error "unknown function callname $callname known functions: [dict keys $functions_known]"
            }
            return [dict get $functions $callname]
        }


        #where we import & the appropriate du_listing.. function for the platform
    }
    namespace eval lib {
        variable du_literal
        variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
        #caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
        #we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this

        proc decode_win_attributes {bitmask} {
            variable winfile_attributes
            if {[dict exists $winfile_attributes $bitmask]} {
                return [dict get $winfile_attributes $bitmask]
            } else {
                #list/dict shimmering?
                return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
            }
        }
        proc attributes_twapi {path {detail basic}} {
            try {
                set iterator [twapi::find_file_open $path -detail $detail] ;# -detail full only adds data to the altname field
                if {[twapi::find_file_next $iterator iteminfo]} {
                    set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
                    set result [dict create -archive 0 -hidden 0 -longname $path -readonly 0 -shortname {} -system 0]
                    if {"hidden" in $attrinfo} {
                        dict set result -hidden 1
                    }
                    if {"system" in $attrinfo} {
                        dict set result -system 1
                    }
                    if {"readonly" in $attrinfo} {
                        dict set result -readonly 1
                    }
                    dict set result -shortname [dict get $iteminfo altname]
                    dict set result -rawflags $attrinfo
                    set extras [list]
                    foreach prop {ctime atime mtime size} {
                        lappend extras $prop [dict get $iteminfo $prop]
                    }
                    dict set result -extras $extras
                    return $result
                } else {
                    error "could not read attributes for $path"
                }
            } finally {
                catch {twapi::find_file_close $iterator}
            }
        }
        namespace export du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix
        # get listing without using unix-tools (may not be installed on the windows system)
        # this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
        proc du_dirlisting_twapi {folderpath} {
            set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
            # return it so it can be stored and tried as an alternative for problem paths
            try {
                set iterator [twapi::find_file_open [file join $folderpath *] -detail basic] ;# -detail full only adds data to the altname field
            } on error args {
                try {
                    if {[string match "*denied*" $args]} {
                        #output similar format as unixy du
                        puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
                        return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
                    }
                    if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
                        puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
                        puts stderr "  (errorcode: $::errorCode)\n"
                        return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
                    }


                    if {[set plen [pathcharacterlen $folderpath]] >= 250} {
                        set errmsg "error reading folder: $folderpath (len:$plen)\n"
                        append errmsg "error: $args" \n
                        append errmsg "errorcode: $::errorCode" \n
                        # re-fetch this folder with altnames
                        #file normalize - aside from being slow - will have problems with long paths - so this won't work.
                        #this function should only accept absolute paths
                        #
                        #
                        #Note: using -detail full  only helps if the last segment of path has an altname..
                        #To properly shorten we need to have kept track of altname all the way from the root!
                        #We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
                        #### SLOW
                        set fixedpath [dict get [file attributes $folderpath] -shortname]
                        #### SLOW


                        append errmsg "retrying with with windows altname '$fixedpath'"
                        puts stderr $errmsg
                    } else {
                        set errmsg "error reading folder: $folderpath (len:$plen)\n"
                        append errmsg "error: $args" \n
                        append errmsg "errorcode: $::errorCode" \n
                        #possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
                        #we can use //?/path  dos device path - but not with tcl functions
                        #unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
                        #this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here

                        set parent [file dirname $folderpath]
                        set badtail [file tail $folderpath]
                        set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
                        set fixedtail ""
                        while {[twapi::find_file_next $iterator iteminfo]} {
                            set nm [dict get $iteminfo name]
                            if {$nm eq $badtail} {
                                set fixedtail [dict get $iteminfo altname]
                                break
                            }
                        }

                        if {![string length $fixedtail]} {
                            puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
                            return [list dirs {} vfsmounts {} links {} files {} filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
                        }
                        #twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
                        #Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
                        #we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
                        #so the illegalname_fix doesn't really work here
                        #set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]

                        #this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
                        set fixedpath [file join $parent $fixedtail]
                        append errmsg "retrying with with windows dos device path $fixedpath\n"
                        puts stderr $errmsg

                    }


                    set iterator [twapi::find_file_open $fixedpath/* -detail basic]


                } on error args {
                        set errmsg "error reading folder: $folderpath\n"
                        append errmsg "error: $args"
                        append errmsg "aborting.."
                        error $errmsg

                }
            }
            set dirs [list]
            set files [list]
            set filesizes [list]
            set links [list]
            set flaggedhidden [list]
            set flaggedsystem [list]
            set flaggedreadonly [list]

            while {[twapi::find_file_next $iterator iteminfo]} {
                set nm [dict get $iteminfo name]
                set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
                set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
                #puts stderr "$iteminfo"
                #puts stderr "$nm -> [dict get $iteminfo attrs] -> $attrinfo"

                #attributes applicable to any classification
                set fullname [file_join_one $folderpath $nm]
                if {"hidden" in $attrinfo} {
                    lappend flaggedhidden $fullname
                }
                if {"system" in $attrinfo} {
                    lappend flaggedsystem  $fullname
                }
                if {"readonly" in $attrinfo} {
                    lappend flaggedreadonly  $fullname
                }

                #main classification
                if {"reparse_point" in $attrinfo} {
                    #we will treat as  zero sized for du purposes.. review - option -L  for symlinks like BSD du?
                    #Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
                    #The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
                    #if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
                    #e.g (stripped of headers/footers and other lines)
                    #2022-10-02  04:07 AM    <SYMLINKD>     priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
                    #Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
                    #du includes the size of the symlink
                    #but we can't get it with tcl's file size
                    #twapi doesn't seem to have anything to help read it either (?)
                    #the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
                    #
                    #Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
                    #
                    #links are techically files too, whether they point to a file/dir or nothing.
                    lappend links $fullname
                } elseif {"directory" in $attrinfo} {
                    if {$nm in {. ..}} {
                        continue
                    }
                    lappend dirs $fullname
                } else {

                    #review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
                    lappend files $fullname
                    lappend filesizes [dict get $iteminfo size]
                }
            }
            twapi::find_file_close $iterator
            set vfsmounts [get_vfsmounts_in_folder $folderpath]
            #also determine whether vfs.  file system x  is *much* faster than file attributes
            #whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
            return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes $filesizes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname]
        }
        proc get_vfsmounts_in_folder {folderpath} {
            set vfsmounts [list]
            set known_vfs_mounts [vfs::filesystem info]
            foreach mount $known_vfs_mounts {
                if {[punk::repo::path_a_above_b $folderpath $mount]} {
                    if {([llength [file split $mount]] - [llength [file split $folderpath]]) == 1} {
                        #the mount is in this folder
                        lappend vfsmounts $mount
                    }
                }
            }
            return $vfsmounts
        }
        #work around the horrible tilde-expansion thing (not needed for tcl 9+)
        proc file_join_one {base newtail} {
            if {[string index $newtail 0] ne {~}} {
                return [file join $base $newtail]
            }
            return [file join $base ./$newtail]
        }


        #this is the cross-platform pure-tcl version - which calls glob multiple times to make sure it gets everythign it needs and can ignore everything it needs to.
        #These repeated calls to glob will be a killer for performance  - especially on a network share or when walking a large directory structure
        proc du_dirlisting_generic {folderpath} {
            #note platform differences between what is considered hidden make this tricky.
            # on windows 'glob .*' will not return some hidden dot items but will return . ..   and glob -types hidden .* will not return some dotted items
            # glob -types hidden *  on windows will not necessarily return all dot files/folders
            # unix-like platforms seem to consider all dot files as hidden so processing is more straightforward
            #  we need to process * and .* in the same glob calls and remove duplicates
            #  if we do * and .* in separate iterations of this loop we lose the ability to filter duplicates easily

            #Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink'
            #set parent [lindex $folders $folderidx]
            set hdirs  [glob -nocomplain -dir $folderpath -types {hidden d}  * .*]
            #set hdirs {}
            set dirs   [glob -nocomplain -dir $folderpath -types d           * .*]

            set hlinks [glob -nocomplain -dir $folderpath -types {hidden l}  * .*]
            #set hlinks {}
            set links  [glob -nocomplain -dir $folderpath -types l           * .*] ;#links may have dupes - we don't care. struct::set difference will remove
            #set links  [lsort -unique [concat $hlinks $links[unset links]]]

            set hfiles [glob -nocomplain -dir $folderpath -types {hidden f}  * .*]
            #set hfiles {}
            set files  [glob -nocomplain -dir $folderpath -types f           * .*]
            #set files  {}

            #note struct::set difference produces unordered result
            #struct::set difference removes duplicates
            #remove links and . .. from directories, remove links from files
            set dirs  [struct::set difference [concat $hdirs $dirs[unset dirs]]    [concat $links [list [file join $folderpath .]  [file join $folderpath ..] ]]]
            set files [struct::set difference [concat $hfiles $files[unset files]] $links]
            set links [lsort -unique [concat $links $hlinks]]
            if {"windows" eq $::tcl_platform(platform)} {
                set flaggedhidden [concat $hdirs $hfiles $hlinks]
            } else {
                #unix dotted files/folders are not 'flagged' as such - it's a convention - so the client can use the same convention to decide if something is hidden
                #this allows us to return less data - but more importantly - reserve flaggedhidden for systems where such a flag exists and avoid conflating the different concepts of what is hidden
                set flaggedhidden {}
            }

            set vfsmounts [get_vfsmounts_in_folder $folderpath]

            set filesizes [list]; #not available in listing-call - as opposed to twapi which can do it as it goes
            return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
        }

        #we can halve the number of round trips on unix-like systems, where 'hidden' always corresponds to dotted files
        proc du_dirlisting_unix {folderpath} {
            set dirs   [glob -nocomplain -dir $folderpath -types d           * .*] ;# also returns links to dirs
            set links  [glob -nocomplain -dir $folderpath -types l           * .*]  ;# links may have dupes - we don't care. struct::set difference will remove
            set files  [glob -nocomplain -dir $folderpath -types f           * .*] ;# also returns links to files
            #remove any links from our dirs and files collections
            set dirs  [struct::set difference $dirs[unset dirs]    [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
            set files [struct::set difference $files[unset files] $links]
            set vfsmounts [get_vfsmounts_in_folder $folderpath]
            return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
        }
        proc du_dirlisting_tclvfs {folderpath} {
            set dirs   [glob -nocomplain -dir $folderpath -types d           * .*] ;# also returns links to dirs
            #review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs?
            set links  [glob -nocomplain -dir $folderpath -types l           * .*]  ;# links may have dupes - we don't care. struct::set difference will remove
            set files  [glob -nocomplain -dir $folderpath -types f           * .*] ;# also returns links to files
            #remove any links from our dirs and files collections
            set dirs  [struct::set difference $dirs[unset dirs]    [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
            set files [struct::set difference $files[unset files] $links]
            #nested vfs mount.. REVIEW - does anything need special handling?
            set vfsmounts [get_vfsmounts_in_folder $folderpath]
            return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {}]
        }


        proc du_lit value {
            variable du_literal
            if {![info exists du_literal($value)]} {
                set du_literal($value) $value
            }
            return $du_literal($value)
        }

        #v1
        proc du_new_eachdirtail {dirtails depth parentfolderidx} {
            set newlist {}
            upvar folders folders
            set parentpath [lindex $folders $parentfolderidx]
            set newindex [llength $folders]
            foreach dt $dirtails {
                lappend folders [file join $parentpath [du_lit $dt]]; #store as a 'path' rather than a string (see tcl::unsupported::representation)
                lappend newlist [::list $depth $parentfolderidx $newindex [du_lit $dt] [expr {0}]]
                incr newindex
            }
            return $newlist
        }
        proc du_new_eachdir {dirpaths depth parentfolderidx} {
            set newlist {}
            upvar folders folders
            set newindex [llength $folders]
            foreach dp $dirpaths {
                lappend folders $dp
                #puts stdout "--->$dp"
                lappend newlist [::list $depth $parentfolderidx $newindex [du_lit [file tail $dp]] [expr {0}]]
                incr newindex
            }
            return $newlist
        }

        #just an experiment
        #get length of path which has internal rep of path - maintaining path/list rep without shimmering to string representation.
        proc pathcharacterlen {pathrep} {
            set l 0
            set parts [file split $pathrep]
            if {[llength $parts] < 2} {
                return [string length [lindex $parts 0]]
            }
            foreach seg $parts {
                incr l [string length $seg]
            }
            return [expr {$l + [llength $parts] -2}]
        }
        #slower - doesn't work for short paths like c:/
        proc pathcharacterlen2 {pathrep} {
            return [tcl::mathop::+ {*}[lmap v [set plist [file split $pathrep]] {[string length $v]}] [llength $plist] -2]
        }

        #Strip using lengths without examining path components
        #without normalization is much faster
        proc path_strip_alreadynormalized_prefixdepth {path prefix} {
            set tail [lrange  [file split $path] [llength [file split $prefix]] end]
            if {[llength $tail]} {
                return [file join {*}$tail]
            }  else {
                return ""
            }
        }


    }
    package require natsort
    #interp alias {} du {} .=args>* punk::du |> .=>1 natsort::sort -cols 1 |> list_as_lines <args|
    #use natsort -debug 2 to see index output
    #this works better for display of directory/file names with spaces (doesn't show curly braces)
    interp alias {} du2 {}   .=args>* punk::du::du |> .=>1 natsort::sort -cols 1  -outputformat words |> list_as_lines <args|

    #experiment with csv as easy way to get column like format..
    #The /r is somewhat cheating however.. as it messes up redirected output .. e.g if redirected to text file
    interp alias {} du {}  .=args>* punk::du::du |> .=>1 natsort::sort -cols 1  -outputformat csv -outputformatoptions {\r\t\t\t} |> list_as_lines <args|


}


 namespace eval ::punk::du::active {
    variable functions
    variable functions_kown
    upvar ::punk::du::has_twapi has_twapi

    if {"windows" eq $::tcl_platform(platform)} {
        if {$has_twapi} {
            set_active_function du_dirlisting du_dirlisting_twapi
        } else {
            set_active_function du_dirlisting du_dirlisting_generic
        }
    } else {
        set_active_function du_dirlisting du_dirlisting_unix
    }
}


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Ready
package provide punk::du [namespace eval punk::du {
    variable version
    set version 0.1.0
}]
return