You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1438 lines
70 KiB

# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2023
#
# @@ Meta Begin
# Application punk::du 0.1.0
# Meta platform tcl
# Meta license BSD
# @@ Meta End
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Requirements
##e.g package require frobz
package require punk::mix::base
package require struct::set
namespace eval punk::du {
variable has_twapi 0
}
if {"windows" eq $::tcl_platform(platform)} {
if {![interp issafe]} {
#package require zzzload ;#dependency on Thread package
#zzzload::pkg_require twapi
}
if {[catch {package require twapi}]} {
puts stderr "Warning: punk::du - unable to load twapi. Disk operations may be much slower on windows without the twapi package"
} else {
set punk::du::has_twapi 1
}
#package require punk::winpath
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::du {
proc dirlisting {folderpath args} {
set defaults [dict create\
-glob *\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
if {[lib::pathcharacterlen $folderpath] == 0} {
set folderpath [pwd]
} elseif {[file pathtype $folderpath] ne "absolute"} {
#file normalize relativelly slow - avoid in inner loops
#set folderpath [file normalize $folderpath]
}
#run whichever of du_dirlisting_twapi, du_dirlisting_generic, du_dirlisting_unix has been activated
set dirinfo [active::du_dirlisting $folderpath {*}$opts]
}
#Note that unix du seems to do depth-first - which makese sense when piping.. as output can be emitted as we go rather than requiring sort at end.
#breadth-first with sort can be quite fast .. but memory usage can easily get out of control
proc du { args } {
variable has_twapi
if 0 {
switch -exact [llength $args] {
0 {
set dir .
set switch -k
}
1 {
set dir $args
set switch -k
}
2 {
set switch [lindex $args 0]
set dir [lindex $args 1]
}
default {
set msg "only one switch and one dir "
append msg "currently supported"
return -code error $msg
}
}
set switch [string tolower $switch]
set -b 1
set -k 1024
set -m [expr 1024*1024]
}
set opts $args
# flags in args are solos (or longopts --something=somethingelse) or sometimes pairopts
# we don't currently support mashopts (ie -xy vs separate -x -y)
#-------------------------------------------------------
# process any pairopts first and remove the pair
# (may also process some solo-opts)
set opt_depth -1
if {[set posn [lsearch $opts -d]] >= 0} {
set opt_depth [lindex $opts $posn+1]
set opts [lreplace $opts $posn $posn+1]
}
foreach o $opts {
if {[string match --max-depth=* $o]} {
set opt_depth [lindex [split $o =] 1]
if {![string is integer -strict $opt_depth]} {
error "--max-depth=n n must be an integer"
}
}
}
#-------------------------------------------------------
#only solos and longopts remain in the opts now
set lastarg [lindex $opts end]
if {[string length $lastarg] && (![string match -* $lastarg])} {
set dir $lastarg
set opts [lrange $opts 0 end-1]
} else {
set dir .
set opts $opts
}
foreach a $opts {
if {![string match -* $a]} {
error "unrecognized option '$a'"
}
}
set -b 1
set -k 1024
set -m [expr 1024*1024]
set switch -k ;#default (same as unix)
set lc_opts [string tolower $opts]
if {"-b" in $lc_opts} {
set switch -b
} elseif {"-k" in $lc_opts} {
set switch -k
} elseif {"-m" in $lc_opts} {
set switch -m
}
set opt_progress 0
if {"--prog" in $lc_opts || "--progress" in $lc_opts} {
set opt_progress 1
}
set opt_extra 0
if {"--extra" in $lc_opts} {
set opt_extra 1
}
set opt_vfs 0
#This configures whether to enter a vfsmount point
#It will have no effect if cwd already with a vfs mount point - as then opt_vfs will be set to 1 automatically anyway.
if {"--vfs" in $lc_opts} {
set opt_vfs 1
}
set result [list]
set dir_depths_remaining [list]
set is_windows [expr {$::tcl_platform(platform) eq "windows"}]
set zero [expr {0}]
# ## ### ### ### ###
# containerid and itemid
set folders [list] ;#we lookup string by index
lappend folders [file dirname $dir]
lappend folders $dir ;#itemindex 1
# ## ### ### ### ###
if {![file isdirectory $dir]} {
lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] [file size $dir]]
#set ary($dir,bytes) [file size $dir]
set leveldircount 0
} else {
lappend dir_depths_remaining [list $zero $zero [expr {1}] [lib::du_lit [file tail $dir]] $zero]
set leveldircount 1
}
set level [expr {0}]
set nextlevel [expr {1}]
#dir_depths list structure
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
#0 1 2 3 4 5
#i_depth i_containerid i_itemid i_item i_size i_index
# -- --- --- --- --- --- --- --- --- --- --- --- --- --- --- ---
set i_depth [expr {0}]
set i_containerid [expr {1}]
set i_itemid [expr {2}]
set i_item [expr {3}]
set i_size [expr {4}]
set i_index [expr {5}]
set listlength [llength $dir_depths_remaining]
set diridx 0
#this is a breadth-first algorithm
while {$leveldircount > 0} {
set leveldirs 0
set levelfiles 0
for {set i $diridx} {$i < $listlength} {incr i} {
#lassign [lindex $dir_depths_remaining $i] _d containeridx folderidx itm bytecount
set folderidx [lindex $dir_depths_remaining $i $i_itemid]
set folderpath [lindex $folders $folderidx]
#puts stderr ->$folderpath
#if {$i >= 20} {
#return
#}
#twapi supports gathering file sizes during directory contents traversal
#for dirlisting methods that return an empty list in filesizes whilst files has entries - we will need to populate it below
#e.g tcl glob based dirlisting doesn't support gathering file sizes at the same time
set in_vfs 0
if {[package provide vfs] ne ""} {
if {[file pathtype $folderpath] ne "absolute"} {
set testpath [file normalize $folderpath]
} else {
set testpath $folderpath
}
foreach vfsmount [vfs::filesystem info] {
if {[punk::mix::base::lib::path_a_atorbelow_b $testpath $vfsmount]} {
set in_vfs 1
#if already descended to or below a vfs mount point - set opt_vfs true
set opt_vfs 1
break
}
}
}
if {$in_vfs} {
set du_info [lib::du_dirlisting_tclvfs $folderpath]
} else {
set in_zipfs 0
if {[info commands ::tcl::zipfs::mount] ne ""} {
if {[file pathtype $folderpath] ne "absolute"} {
set testpath [file normalize $folderpath]
} else {
set testpath $folderpath
}
dict for {zmount zpath} [::tcl::zipfs::mount] {
if {[punk::mix::base::lib::path_a_atorbelow_b $testpath $zmount]} {
set in_zipfs 1
break
}
}
}
if {$in_zipfs} {
#relative vs absolute? review - cwd valid for //zipfs:/ ??
set du_info [lib::du_dirlisting_zipfs $folderpath]
} else {
#run the activated function (proc imported to active namespace and renamed)
set du_info [active::du_dirlisting $folderpath]
}
}
set dirs [dict get $du_info dirs]
set files [dict get $du_info files]
set filesizes [dict get $du_info filesizes]
set vfsmounts [dict get $du_info vfsmounts]
#puts "---> vfsmounts $vfsmounts "
if {$opt_vfs} {
foreach vm $vfsmounts {
#puts stderr "vm: $vm"
#check if vfs is mounted over a file or a dir
if {$vm in $files} {
puts stderr "vfs mounted over file $vm"
set mposn [lsearch $files $vm]
set files [lreplace $files $mposn $mposn]
if {[llength $filesizes]} {
set filesizes [lreplace $filesizes $mposn $mposn]
}
}
if {$vm ni $dirs} {
puts stderr "treating $vm as dir"
lappend dirs $vm
}
}
}
incr leveldirs [llength $dirs]
incr levelfiles [llength $files]
#lappend dir_depths_remaining {*}[lmap d $dirs {::list $nextdepth [lib::du_lit $cont/$itm] $d $zero}]
#folderidx is parent index for new dirs
lappend dir_depths_remaining {*}[lib::du_new_eachdir $dirs $nextlevel $folderidx]
#we don't need to sort files (unless we add an option such as -a to du (?))
set bytecount [expr {0}]
if {[llength $files] && ![llength $filesizes]} {
#listing mechanism didn't supply corresponding sizes
foreach filename $files {
#incr bytecount [file size [file join $folderpath $filename]
incr bytecount [file size $filename]
}
} else {
set filesizes [lsearch -all -inline -not $filesizes[unset filesizes] na] ;#only legal non-number is na
set bytecount [tcl::mathop::+ {*}$filesizes]
}
#we can safely assume initial count was zero
lset dir_depths_remaining $i $i_size $bytecount
#incr diridx
}
#puts stdout "level: $level dirs: $leveldirs"
if {$opt_extra} {
puts stdout "level: $level dircount: $leveldirs filecount: $levelfiles"
}
incr level ;#zero based
set nextlevel [expr {$level + 1}]
set leveldircount [expr {[llength $dir_depths_remaining] - $listlength }]; #current - previous - while loop terminates when zero
#puts "diridx: $diridx i: $i rem: [llength $dir_depths_remaining] listlenth:$listlength levldircount: $leveldircount"
set diridx $i
set listlength [llength $dir_depths_remaining]
}
#puts stdout ">>> loop done"
#flush stdout
#puts stdout $dir_depths_remaining
set dirs_as_encountered $dir_depths_remaining ;#index is in sync with 'folders' list
set dir_depths_longfirst $dirs_as_encountered
#store the index before sorting
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} {
lset dir_depths_longfirst $i $i_index $i
}
set dir_depths_longfirst [lsort -integer -index 0 -decreasing $dir_depths_longfirst[set dir_depths_longfirst {}]]
#store main index in the reducing list
set dir_depths_remaining $dir_depths_longfirst
for {set i 0} {$i < [llength $dir_depths_remaining]} {incr i} {
#stored index at position 3
lset dir_depths_remaining $i $i_index $i
}
#index 3
#dir_depths_remaining -> dir_depths_longfirst -> dirs_as_encountered
#puts stdout "initial dir_depths_remaining: $dir_depths_remaining"
#summing performance is not terrible but significant on large tree - the real time is for large trees in the main loop above
#update - on really large trees the reverse is true especiallyl now that twapi fixed the original speed issues.. todo - rework/simplify below - review natsort
#
#TODO - reconsider sorting by depth.. lreverse dirs_as_encountered should work..
if {[llength $dir_depths_longfirst] > 1} {
set i 0
foreach dd $dir_depths_longfirst {
lassign $dd d parentidx folderidx item bytecount
#set nm $cont/$item
set nm [lindex $folders $folderidx]
set dnext [expr {$d +1}]
set nextdepthposns [lsearch -all -integer -index 0 $dir_depths_remaining $dnext]
set nextdepthposns [lsort -integer -decreasing $nextdepthposns[set nextdepthposns {}]];#remove later elements first
foreach posn $nextdepthposns {
set id [lindex $dir_depths_remaining $posn $i_itemid]
set ndirname [lindex $folders $id]
#set ndirname $cont/$item
#set item [lindex $dir_depths_remaining $posn $i_item]
#set ndirname [lindex $ndir 1]
if {[string match $nm/* $ndirname]} {
#puts stdout "dir $nm adding subdir size $ndirname"
#puts stdout "incr $nm from $ary($nm,bytes) plus $ary($ndirname,bytes)"
incr bytecount [lindex $dir_depths_remaining $posn $i_size]
set dir_depths_remaining [lreplace $dir_depths_remaining[set dir_depths_remaining {}] $posn $posn]
}
}
lset dir_depths_longfirst $i $i_size $bytecount
set p [lsearch -index $i_index -integer $dir_depths_remaining $i]
lset dir_depths_remaining $p $i_size $bytecount
#set ary($nm,bytes) $bytecount
incr i
}
}
#set dir_depths_longfirst [lsort -index 1 -decreasing $dir_depths_longfirst]
#
set retval [list]
#copy across the bytecounts
for {set i 0} {$i < [llength $dir_depths_longfirst]} {incr i} {
set posn [lindex $dir_depths_longfirst $i $i_index]
set bytes [lindex $dir_depths_longfirst $i $i_size]
lset dirs_as_encountered $posn $i_size $bytes
}
foreach dirinfo [lreverse $dirs_as_encountered] {
set id [lindex $dirinfo $i_itemid]
set depth [lindex $dirinfo $i_depth]
if {($opt_depth >= 0) && $depth > $opt_depth} {
continue
}
set path [lindex $folders $id]
#set path $cont/$item
set item [lindex $dirinfo $i_item]
set bytes [lindex $dirinfo $i_size]
set size [expr {$bytes / [set $switch]}]
lappend retval [list $size $path]
}
# copyright 2002 by The LIGO Laboratory
return $retval
}
namespace eval active {
variable functions [list du_dirlisting ""]
variable functions_known [dict create]
#known functions from lib namespace
dict set functions_known du_dirlisting [list du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided]
proc show_functions {} {
variable functions
variable functions_known
set msg ""
dict for {callname implementations} $functions_known {
append msg "callname: $callname" \n
foreach imp $implementations {
if {[dict get $functions $callname] eq $imp} {
append msg " $imp (active)" \n
} else {
append msg " $imp" \n
}
}
}
return $msg
}
proc set_active_function {callname implementation} {
variable functions
variable functions_known
if {$callname ni [dict keys $functions_known]} {
error "unknown function callname $callname"
}
if {$implementation ni [dict get $functions_known $callname]} {
error "unknown implementation $implementation for callname $callname"
}
dict set functions $callname $implementation
catch {rename ::punk::du::active::$callname ""}
namespace eval ::punk::du::active [string map [list %imp% $implementation %call% $callname] {
namespace import ::punk::du::lib::%imp%
rename %imp% %call%
}]
return $implementation
}
proc get_active_function {callname} {
variable functions
variable functions_known
if {$callname ni [dict keys $functions_known]} {
error "unknown function callname $callname known functions: [dict keys $functions_known]"
}
return [dict get $functions $callname]
}
#where we import & the appropriate du_listing.. function for the platform
}
namespace eval lib {
variable du_literal
variable winfile_attributes [list 16 directory 32 archive 1024 reparse_point 18 [list directory hidden] 34 [list archive hidden] ]
#caching this is faster than calling twapi api each time.. unknown if twapi is calculating from bitmask - or calling windows api
#we could work out all flags and calculate from bitmask.. but it's not necessarily going to be faster than some simple caching mechanism like this
proc decode_win_attributes {bitmask} {
variable winfile_attributes
if {[dict exists $winfile_attributes $bitmask]} {
return [dict get $winfile_attributes $bitmask]
} else {
#list/dict shimmering?
return [lindex [lappend winfile_attributes $bitmask [twapi::decode_file_attributes $bitmask]] end]
}
}
proc attributes_twapi {path {detail basic}} {
try {
set iterator [twapi::find_file_open $path -detail $detail] ;# -detail full only adds data to the altname field
if {[twapi::find_file_next $iterator iteminfo]} {
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
set result [dict create -archive 0 -hidden 0 -longname $path -readonly 0 -shortname {} -system 0]
if {"hidden" in $attrinfo} {
dict set result -hidden 1
}
if {"system" in $attrinfo} {
dict set result -system 1
}
if {"readonly" in $attrinfo} {
dict set result -readonly 1
}
dict set result -shortname [dict get $iteminfo altname]
dict set result -rawflags $attrinfo
set extras [list]
#foreach prop {ctime atime mtime size} {
# lappend extras $prop [dict get $iteminfo $prop]
#}
#dict set result -extras $extras
dict set result -raw $iteminfo
return $result
} else {
error "could not read attributes for $path"
}
} finally {
catch {twapi::find_file_close $iterator}
}
}
#todo - review 'errors' key. We have errors relating to containing folder and args vs per child-item errors - additional key needed?
namespace export du_dirlisting_twapi du_dirlisting_generic du_dirlisting_unix du_dirlisting_undecided
# get listing without using unix-tools (may not be installed on the windows system)
# this dirlisting is customised for du - so only retrieves dirs,files,filesizes (minimum work needed to perform du function)
# This also preserves path rep for elements in the dirs/folders keys etc - which can make a big difference in performance
proc du_dirlisting_twapi {folderpath args} {
set defaults [dict create\
-glob *\
-with_sizes 1\
-with_times 1\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean - (f false vs f file!)
#only accept 0|1
if {$opt_with_sizes} {
set sized_types $ftypes
} else {
set sized_types [list]
}
} else {
set sized_types $opt_with_sizes
}
if {[llength $sized_types]} {
foreach st $sized_types {
if {$st ni $ftypes} {
error "du_dirlisting_twapi unrecognized element in -with_sizes '$st'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_times [dict get $opts -with_times]
if {"$opt_with_times" in {0 1}} {
if {$opt_with_times} {
set timed_types $ftypes
} else {
set timed_types [list]
}
} else {
set timed_types $opt_with_times
}
if {[llength $timed_types]} {
foreach item $timed_types {
if {$item ni $ftypes} {
error "du_dirlisting_twapi unrecognised element in -with-times '$item'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set errors [dict create]
set altname "" ;#possible we have to use a different name e.g short windows name or dos-device path //?/
# return it so it can be stored and tried as an alternative for problem paths
#puts stderr ">>> glob: $opt_glob"
#REVIEW! windows api pattern matchttps://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/hing is .. weird. partly due to 8.3 filenames
#https://www.red-gate.com/simple-talk/blogs/the-unexpected-behaviour-of-directoryinfo-getfiles-with-three-letter-extensions/
#we will certainly need to check the resulting listing with our supplied glob.. but maybe we will have to change the glob passed to find_file_open too.
# using * all the time may be inefficient - so we might be able to avoid that in some cases.
try {
#glob of * will return dotfiles too on windows
set iterator [twapi::find_file_open [file join $folderpath $opt_glob] -detail basic] ;# -detail full only adds data to the altname field
} on error args {
try {
if {[string match "*denied*" $args]} {
#output similar format as unixy du
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
if {[string match "*TWAPI_WIN32 59*" $::errorCode]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (possibly blocked by permissions or share config e.g follow symlinks = no on samba)"
puts stderr " (errorcode: $::errorCode)\n"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#errorcode TWAPI_WIN32 2 {The system cannot find the file specified.}
#This can be a perfectly normal failure to match the glob.. which means we shouldn't really warn or error
#The find-all glob * won't get here because it returns . & ..
#so we should return immediately only if the glob has globchars ? or * but isn't equal to just "*" ? (review)
#Note that windows glob ? seems to return more than just single char results - it includes .. - which differs to tcl glob
#also ???? seems to returns items 4 or less - not just items exactly 4 long (review - where is this documented?)
if {$opt_glob ne "*" && [regexp {[?*]} $opt_glob]} {
if {[string match "*TWAPI_WIN32 2 *" $::errorCode]} {
#looks like an ordinary no results for chosen glob
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
if {[set plen [pathcharacterlen $folderpath]] >= 250} {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
# re-fetch this folder with altnames
#file normalize - aside from being slow - will have problems with long paths - so this won't work.
#this function should only accept absolute paths
#
#
#Note: using -detail full only helps if the last segment of path has an altname..
#To properly shorten we need to have kept track of altname all the way from the root!
#We can .. for now call Tcl's file attributes to get shortname of the whole path - it is *expensive* e.g 5ms for a long path on local ssd
#### SLOW
set fixedpath [dict get [file attributes $folderpath] -shortname]
#### SLOW
append errmsg "retrying with with windows altname '$fixedpath'"
puts stderr $errmsg
} else {
set errmsg "error reading folder: $folderpath (len:$plen)\n"
append errmsg "error: $args" \n
append errmsg "errorcode: $::errorCode" \n
set tmp_errors [list $::errorCode]
#possibly an illegal windows filename - easily happens on a machine with WSL or with drive mapped to unix share
#we can use //?/path dos device path - but not with tcl functions
#unfortunately we can't call find_file_open directly on the problem name - we have to call the parent folder and iterate through again..
#this gets problematic as we go deeper unless we rewrite the .. but we can get at least one level further here
set fixedtail ""
set parent [file dirname $folderpath]
set badtail [file tail $folderpath]
set iterator [twapi::find_file_open [file join $parent *] -detail full] ;#retrieve with altnames
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
if {$nm eq $badtail} {
set fixedtail [dict get $iteminfo altname]
break
}
}
if {![string length $fixedtail]} {
dict lappend errors $folderpath {*}$tmp_errors
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (Unable to retrieve altname to progress further with path - returning no contents for this folder)"
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#twapi as at 2023-08 doesn't seem to support //?/ dos device paths..
#Tcl can test only get as far as testing existence of illegal name by prefixing with //?/ - but can't glob inside it
#we can call file attributes on it - but we get no shortname (but we could get shortname for parent that way)
#so the illegalname_fix doesn't really work here
#set fixedpath [punk::winpath::illegalname_fix $parent $fixedtail]
#this has shortpath for the tail - but it's not the canonical-shortpath because we didn't call it on the $parent part REIEW.
set fixedpath [file join $parent $fixedtail]
append errmsg "retrying with with windows dos device path $fixedpath\n"
puts stderr $errmsg
}
if {[catch {
set iterator [twapi::find_file_open $fixedpath/* -detail basic]
} errMsg]} {
puts stderr "[file dirname $folderpath] '[file tail $folderpath]':$args (failed to read even with fixedpath:'$fixedpath')"
puts stderr " (errorcode: $::errorCode)\n"
puts stderr "$errMsg"
dict lappend errors $folderpath $::errorCode
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
} on error args {
set errmsg "error reading folder: $folderpath\n"
append errmsg "error: $args" \n
append errmsg "errorInfo: $::errorInfo" \n
puts stderr "$errmsg"
puts stderr "FAILED to collect info for folder '$folderpath'"
#append errmsg "aborting.."
#error $errmsg
return [list dirs {} vfsmounts {} links {} files {} filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
}
set dirs [list]
set files [list]
set filesizes [list]
set allsizes [dict create]
set alltimes [dict create]
set links [list]
set flaggedhidden [list]
set flaggedsystem [list]
set flaggedreadonly [list]
while {[twapi::find_file_next $iterator iteminfo]} {
set nm [dict get $iteminfo name]
#recheck glob
#review!
if {![string match $opt_glob $nm]} {
continue
}
set tail_altname [dict get $iteminfo altname] ;#altname of tail - not whole path
set attrinfo [decode_win_attributes [dict get $iteminfo attrs]]
#puts stderr "$iteminfo"
#puts stderr "$nm -> [dict get $iteminfo attrs] -> $attrinfo"
set ftype ""
#attributes applicable to any classification
set fullname [file_join_one $folderpath $nm]
if {"hidden" in $attrinfo} {
lappend flaggedhidden $fullname
}
if {"system" in $attrinfo} {
lappend flaggedsystem $fullname
}
if {"readonly" in $attrinfo} {
lappend flaggedreadonly $fullname
}
#main classification
if {"reparse_point" in $attrinfo} {
#this concept doesn't correspond 1-to-1 with unix links
#https://learn.microsoft.com/en-us/windows/win32/fileio/reparse-points
#review - and see which if any actually belong in the links key of our return
#One thing it could be, is a 'mounted folder' https://learn.microsoft.com/en-us/windows/win32/fileio/determining-whether-a-directory-is-a-volume-mount-point
#
#we will treat as zero sized for du purposes.. review - option -L for symlinks like BSD du?
#Note 'file readlink' can fail on windows - reporting 'invalid argument' - according to tcl docs, 'On systems that don't support symbolic links this option is undefined'
#The link may be viewable ok in windows explorer, and cmd.exe /c dir and unix tools such as ls
#if we need it without resorting to unix-tools that may not be installed: exec {*}[auto_execok dir] /A:L {c:\some\path}
#e.g (stripped of headers/footers and other lines)
#2022-10-02 04:07 AM <SYMLINKD> priv [\\?\c:\repo\elixir\gameportal\apps\test\priv]
#Note we will have to parse beyond header fluff as /B strips the symlink info along with headers.
#du includes the size of the symlink
#but we can't get it with tcl's file size
#twapi doesn't seem to have anything to help read it either (?)
#the above was verified with a symlink that points to a non-existant folder.. mileage may vary for an actually valid link
#
#Note also - a shortcut created in explorer with drag and drop to an existant folder is a different animal to a symlink (file with .lnk extension) even though it looks the same in explorer window.
#
#links are techically files too, whether they point to a file/dir or nothing.
lappend links $fullname
set ftype "l"
} elseif {"directory" in $attrinfo} {
if {$nm in {. ..}} {
continue
}
lappend dirs $fullname
set ftype "d"
} else {
#review - is anything that isn't a reparse_point or a directory, some sort of 'file' in this context? What about the 'device' attribute? Can that occur in a directory listing of some sort?
lappend files $fullname
if {"f" in $sized_types} {
lappend filesizes [dict get $iteminfo size]
}
set ftype "f"
}
if {$ftype in $sized_types} {
dict set allsizes $fullname [dict create bytes [dict get $iteminfo size]]
}
if {$ftype in $timed_types} {
#convert time from windows (100ns units since jan 1, 1601) to Tcl time (seconds since Jan 1, 1970)
#We lose some precision by not passing the boolean to the large_system_time_to_secs_since_1970 function which returns fractional seconds
#but we need to maintain compatibility with other platforms and other tcl functions so if we want to return more precise times we will need another flag and/or result dict
dict set alltimes $fullname [dict create\
c [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo ctime]]\
a [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo atime]]\
m [twapi::large_system_time_to_secs_since_1970 [dict get $iteminfo mtime]]\
]
}
}
twapi::find_file_close $iterator
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
#also determine whether vfs. file system x is *much* faster than file attributes
#whether or not there is a corresponding file/dir add any applicable mountpoints for the containing folder
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes $filesizes sizes $allsizes times $alltimes flaggedhidden $flaggedhidden flaggedsystem $flaggedsystem flaggedreadonly $flaggedreadonly altname $altname opts $effective_opts errors $errors]
}
proc get_vfsmounts_in_folder {folderpath} {
set vfsmounts [list]
if {![llength [package provide vfs]]} {
return [list]
}
set fpath [punk::objclone $folderpath]
set is_rel 0
if {[file pathtype $fpath] ne "absolute"} {
set fpath [file normalize $fpath]
set is_rel 1
}
set known_vfs_mounts [vfs::filesystem info]
foreach mount $known_vfs_mounts {
if {[punk::mix::base::lib::path_a_above_b $fpath $mount]} {
if {([llength [file split $mount]] - [llength [file split $fpath]]) == 1} {
#the mount is in this folder
if {$is_rel} {
lappend vfsmounts [file join $folderpath [file tail $mount]]
} else {
lappend vfsmounts $mount
}
}
}
}
return $vfsmounts
}
#work around the horrible tilde-expansion thing (not needed for tcl 9+)
proc file_join_one {base newtail} {
if {[string index $newtail 0] ne {~}} {
return [file join $base $newtail]
}
return [file join $base ./$newtail]
}
#this is the cross-platform pure-tcl version - which calls glob multiple times to make sure it gets everythign it needs and can ignore everything it needs to.
#These repeated calls to glob will be a killer for performance - especially on a network share or when walking a large directory structure
proc du_dirlisting_generic {folderpath args} {
set opts [dict create\
-glob *\
-with_sizes 0\
-with_times 0\
]
set errors [dict create]
foreach {k v} $args {
switch -- $k {
-glob - -with_sizes - -with_times {
dict set opts $k $v
}
default {
error "du_dirlisting_generic unknown-option '$k'. Known-options: [dict keys $opts]"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean (false vs f problem where f indicates file)
if {$opt_with_sizes} {
set sized_types $ftypes
} else {
set sized_types [list]
}
} else {
set sized_types $opt_with_sizes
}
if {[llength $sized_types]} {
foreach st $sized_types {
if {$st ni $ftypes} {
error "du_dirlisting_generic unrecognized element in -with_sizes '$st'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_times [dict get $opts -with_times]
if {"$opt_with_times" in {0 1}} {
if {$opt_with_times} {
set timed_types $ftypes
} else {
set timed_types [list]
}
} else {
set timed_types $opt_with_times
}
if {[llength $timed_types]} {
foreach item $timed_types {
if {$item ni $ftypes} {
error "du_dirlisting_generic unrecognised element in -with-times '$item'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
# The repeated globs are a source of slowness for this function.
#TODO - we could minimize the number of globs if we know we need to do a file stat and/or file attributes on each entry anyway
#For the case where we don't need times,sizes or other metadata - it is faster to do multiple globs
#This all makes this function complicated to gather the required data efficiently.
#note platform differences between what is considered hidden make this tricky.
# on windows 'glob .*' will not return some hidden dot items but will return . .. and glob -types hidden .* will not return some dotted items
# glob -types hidden * on windows will not necessarily return all dot files/folders
# unix-like platforms seem to consider all dot files as hidden so processing is more straightforward
# we need to process * and .* in the same glob calls and remove duplicates
# if we do * and .* in separate iterations of this loop we lose the ability to filter duplicates easily
#note - with this design, we can't glob for all except dotfiles - this is for cross-platform consistency and efficiency (Review).
#dotfiles aren't considered hidden on all platforms
#some sort of antiglob is a possible enhancement and more consistent with the view that leading dot should be treated as any other filename character in this context.
if {$opt_glob eq "*"} {
#Note - glob operations seem to be faster than looped tests like 'file isdirectory' & 'file readlink'
#set parent [lindex $folders $folderidx]
set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} * .*]
#set hdirs {}
set dirs [glob -nocomplain -dir $folderpath -types d * .*]
set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} * .*]
#set hlinks {}
set links [glob -nocomplain -dir $folderpath -types l * .*] ;#links may have dupes - we don't care. struct::set difference will remove (?)
#set links [lsort -unique [concat $hlinks $links[unset links]]]
set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} * .*]
#set hfiles {}
set files [glob -nocomplain -dir $folderpath -types f * .*]
#set files {}
} else {
set hdirs [glob -nocomplain -dir $folderpath -types {hidden d} $opt_glob]
set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob]
set hlinks [glob -nocomplain -dir $folderpath -types {hidden l} $opt_glob]
set links [glob -nocomplain -dir $folderpath -types l $opt_glob] ;#links may have dupes - we don't care. struct::set difference will remove (?)
set hfiles [glob -nocomplain -dir $folderpath -types {hidden f} $opt_glob]
set files [glob -nocomplain -dir $folderpath -types f $opt_glob]
}
#note struct::set difference produces unordered result
#struct::set difference removes duplicates (but not always.. e.g if using tcl impl and 2nd element empty!)
#relying on struct::set to remove dupes is somewhat risky. It is not well documented - and behaviour of dupes in inputs is underspecified as it appears to be intended for mathematical 'sets'
#remove links and . .. from directories, remove links from files
#ideally we would like to classify links by whether they point to files vs dirs - but there are enough cross-platform differences that we will have to leave it to the caller to sort out for now.
#struct::set will affect order: tcl vs critcl give different ordering!
set files [struct::set difference [concat $hfiles $files[unset files]] $links]
set dirs [struct::set difference [concat $hdirs $dirs[unset dirs]] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
#set links [lsort -unique [concat $links $hlinks]]
#----
set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links]
if {"windows" eq $::tcl_platform(platform)} {
set flaggedhidden [concat $hdirs $hfiles $hlinks]
} else {
#unix dotted files/folders are not 'flagged' as such - it's a convention - so the client can use the same convention to decide if something is hidden
#this allows us to return less data - but more importantly - reserve flaggedhidden for systems where such a flag exists and avoid conflating the different concepts of what is hidden
set flaggedhidden {}
}
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden $flaggedhidden flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors]
}
#zipfs attributes/behaviour fairly different to tclvfs - keep separate
#e.g we can populate compsizes for files (compressed size)
proc du_dirlisting_zipfs {folderpath args} {
puts stderr "zipfs: $folderpath"
set defaults [dict create\
-glob *\
-with_sizes 0\
-with_times 0\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean (false vs f problem where f indicates file)
if {$opt_with_sizes} {
set sized_types $ftypes
} else {
set sized_types [list]
}
} else {
set sized_types $opt_with_sizes
}
if {[llength $sized_types]} {
foreach st $sized_types {
if {$st ni $ftypes} {
error "du_dirlisting_zipfs unrecognized element in -with_sizes '$st'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_times [dict get $opts -with_times]
if {"$opt_with_times" in {0 1}} {
if {$opt_with_times} {
set timed_types $ftypes
} else {
set timed_types [list]
}
} else {
set timed_types $opt_with_times
}
if {[llength $timed_types]} {
foreach item $timed_types {
if {$item ni $ftypes} {
error "du_dirlisting_zipfs unrecognised element in -with-times '$item'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
set errors [dict create]
#determine if the 'folder' we are looking at is at or below a zipfs mountpoint.
#we could be somewhere between //zipfs:/ and //zipfs:/.../.../mountpoint
#The paths in between will not respond to file attr file stat etc (error after long delay)
#we can calculate these intermediate paths from the zipfs mount data and return these intermediates as 'folders' to make the structure navigable
if {$folderpath eq "//zipfs:/"} {
#review - currently dead branch as we can't glob or navigate here?
set dirs [list]
dict for {mount _zipfile} [::tcl::zipfs::mount] {
set parts [file split $mount]
set next [lindex $parts 1]
if {$next ni $dirs} {
lappend dirs $next
}
}
set files {} ;#no files directly here
set links {}
set vfsmounts {} ;#review
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors]
}
set vdirs [list]
dict for {zmount zfile} [::tcl::zipfs::mount] {
if {[punk::mix::base::lib::path_a_above_b $folderpath $zmount]} {
#location is above zmount = location is shorter path than zmount
set remainder [punk::repo::path_strip_prefixdepth $zmount $folderpath] ;#supply shorter path (one to be stripped) as second arg
set next [lindex [file split $remainder] 0]
if {$next ni $vdirs} {
lappend vdirs $next
}
}
}
if {[llength $vdirs]} {
#location is virtual (intermediary location above one or more zipfs mount points)
#'file isdirectory' etc & glob - will not work
set files {}
set links {}
set vfsmounts {} ;#review
return [list dirs $vdirs vfsmounts $vfsmounts links $links files $files filesizes {} sizes {} times {} flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors]
}
#if {[punk::mix::base::lib::path_a_above_b $folderpath "//zipfs:/"]} {}
#zipfs files also reported as links by glob - review - should we preserve this in response?
if {$opt_glob eq "*"} {
set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs
#set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove
set links [list]
set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files
} else {
set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob]
#set links [glob -nocomplain -dir $folderpath -types l $opt_glob]
set links [list]
set files [glob -nocomplain -dir $folderpath -types f $opt_glob]
}
#remove any links from our dirs and files collections
set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
set files [struct::set difference $files[unset files] $links]
#nested vfs mount.. REVIEW - does anything need special handling?
#zipfs probably can't nest in itself - but unknown if vfs can be mounted within //zipfs:/ structure - todo TEST
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links]
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors]
}
proc du_dirlisting_tclvfs {folderpath args} {
set defaults [dict create\
-glob *\
-with_sizes 0\
-with_times 0\
]
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean (false vs f problem where f indicates file)
if {$opt_with_sizes} {
set sized_types $ftypes
} else {
set sized_types [list]
}
} else {
set sized_types $opt_with_sizes
}
if {[llength $sized_types]} {
foreach st $sized_types {
if {$st ni $ftypes} {
error "du_dirlisting_tclvfs unrecognized element in -with_sizes '$st'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_times [dict get $opts -with_times]
if {"$opt_with_times" in {0 1}} {
if {$opt_with_times} {
set timed_types $ftypes
} else {
set timed_types [list]
}
} else {
set timed_types $opt_with_times
}
if {[llength $timed_types]} {
foreach item $timed_types {
if {$item ni $ftypes} {
error "du_dirlisting_tclvfs unrecognised element in -with-times '$item'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set errors [dict create]
if {$opt_glob eq "*"} {
set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs
#review - how are links handled in vfs? presumably if the vfs is a full implementation they should work at least within the vfs?
set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove
set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files
} else {
set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob]
set links [glob -nocomplain -dir $folderpath -types l $opt_glob]
set files [glob -nocomplain -dir $folderpath -types f $opt_glob]
}
#remove any links from our dirs and files collections
set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
set files [struct::set difference $files[unset files] $links]
#nested vfs mount.. REVIEW - does anything need special handling?
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $effective_opts errors $errors]
}
#we can halve the number of round trips on unix-like systems, where 'hidden' always corresponds to dotted files
proc du_dirlisting_unix {folderpath args} {
set defaults [dict create\
-glob *\
-with_sizes 0\
-with_times 0\
]
set errors [dict create]
dict lappend errors $folderpath "metadata support incomplete - prefer du_dirlisting_generic"
set opts [dict merge $defaults $args]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_glob [dict get $opts -glob]
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_sizes [dict get $opts -with_sizes]
set ftypes [list f d l]
if {"$opt_with_sizes" in {0 1}} {
#don't use string is boolean (false vs f problem where f indicates file)
if {$opt_with_sizes} {
set sized_types $ftypes
} else {
set sized_types [list]
}
} else {
set sized_types $opt_with_sizes
}
if {[llength $sized_types]} {
foreach st $sized_types {
if {$st ni $ftypes} {
error "du_dirlisting_generic unrecognized element in -with_sizes '$st'"
}
}
}
# -- --- --- --- --- --- --- --- --- --- --- --- --- ---
set opt_with_times [dict get $opts -with_times]
if {"$opt_with_times" in {0 1}} {
if {$opt_with_times} {
set timed_types $ftypes
} else {
set timed_types [list]
}
} else {
set timed_types $opt_with_times
}
if {[llength $timed_types]} {
foreach item $timed_types {
if {$item ni $ftypes} {
error "du_dirlisting_generic unrecognised element in -with-times '$item'"
}
}
}
#this is not consistent with standard way * works for unix folders - but it is consistent with behaviour of this facility on windows
if {$opt_glob eq "*"} {
set dirs [glob -nocomplain -dir $folderpath -types d * .*] ;# also returns links to dirs
set links [glob -nocomplain -dir $folderpath -types l * .*] ;# links may have dupes - we don't care. struct::set difference will remove
set files [glob -nocomplain -dir $folderpath -types f * .*] ;# also returns links to files
} else {
set dirs [glob -nocomplain -dir $folderpath -types d $opt_glob]
set links [glob -nocomplain -dir $folderpath -types l $opt_glob]
set files [glob -nocomplain -dir $folderpath -types f $opt_glob]
}
#remove any links from our dirs and files collections
set dirs [struct::set difference $dirs[unset dirs] [concat $links [list [file join $folderpath .] [file join $folderpath ..] ]]]
set files [struct::set difference $files[unset files] $links]
set vfsmounts [get_vfsmounts_in_folder $folderpath]
set mdata_lists [du_get_metadata_lists $sized_types $timed_types $files $dirs $links]
set effective_opts $opts
dict set effective_opts -with_times $timed_types
dict set effective_opts -with_sizes $sized_types
return [list dirs $dirs vfsmounts $vfsmounts links $links files $files filesizes [dict get $mdata_lists fsizes] sizes [dict get $mdata_lists allsizes] times [dict get $mdata_lists alltimes] flaggedhidden {} flaggedsystem {} flaggedreadonly {} altname {} opts $opts errors $errors]
}
#return fsizes,allsizes,alltimes metadata in same order as files,dirs,links lists - if specified in sized_types
proc du_get_metadata_lists {sized_types timed_types files dirs links} {
set meta_dict [dict create]
set meta_types [concat $sized_types $timed_types]
#known tcl stat keys 2023 - review
set empty_stat_dict [dict create atime {} ctime {} dev {} gid {} ino {} mode {} mtime {} nlink {} size {} type {} uid {}]
#make sure we call file stat only once per item
set statkeys [list]
if {[llength $meta_types]} {
foreach ft {f d l} lvar {files dirs links} {
if {"$ft" in $meta_types} {
foreach path [set $lvar] {
#caller may have read perm on the containing folder - but not on child item - so file stat could raise an error
if {![catch {file stat $path arrstat} errM]} {
dict set meta_dict $path [dict create shorttype $ft {*}[array get arrstat]]
} else {
dict lappend errors $path "file stat error: $errM"
dict set meta_dict $path [dict create shorttype $ft {*}$empty_stat_dict]
}
}
}
}
}
set fsizes [list]
set allsizes [dict create]
set alltimes [dict create]
#review birthtime field of stat? cross-platform differences ctime etc?
dict for {path pathinfo} $meta_dict {
set ft [dict get $pathinfo shorttype]
if {$ft in $sized_types} {
dict set allsizes $path [dict create bytes [dict get $pathinfo size]]
if {$ft eq "f"} {
#subst with na if empty?
lappend fsizes [dict get $pathinfo size]
}
}
if {$ft in $timed_types} {
dict set alltimes $path [dict create c [dict get $pathinfo ctime] a [dict get $pathinfo atime] m [dict get $pathinfo mtime]]
}
}
#todo - fix . The list lengths will presumably match but have empty values if failed to stat
if {"f" in $sized_types} {
if {[llength $fsizes] ne [llength $files]} {
dict lappend errors $folderpath "failed to retrieve all file sizes"
}
}
return [dict create fsizes $fsizes allsizes $allsizes alltimes $alltimes]
}
proc du_lit value {
variable du_literal
if {![info exists du_literal($value)]} {
set du_literal($value) $value
}
return $du_literal($value)
}
#v1
proc du_new_eachdirtail {dirtails depth parentfolderidx} {
set newlist {}
upvar folders folders
set parentpath [lindex $folders $parentfolderidx]
set newindex [llength $folders]
foreach dt $dirtails {
lappend folders [file join $parentpath [du_lit $dt]]; #store as a 'path' rather than a string (see tcl::unsupported::representation)
lappend newlist [::list $depth $parentfolderidx $newindex [du_lit $dt] [expr {0}]]
incr newindex
}
return $newlist
}
proc du_new_eachdir {dirpaths depth parentfolderidx} {
set newlist {}
upvar folders folders
set newindex [llength $folders]
foreach dp $dirpaths {
lappend folders $dp
#puts stdout "--->$dp"
lappend newlist [::list $depth $parentfolderidx $newindex [du_lit [file tail $dp]] [expr {0}]]
incr newindex
}
return $newlist
}
#same implementation as punk::strlen
#get length of path which has internal rep of path - maintaining path/list rep without shimmering to string representation.
proc pathcharacterlen {pathrep} {
append str2 $pathrep {}
string length $str2
}
#just an experiment
proc pathcharacterlen1 {pathrep} {
#This works - but is unnecessarily complex
set l 0
set parts [file split $pathrep]
if {[llength $parts] < 2} {
return [string length [lindex $parts 0]]
}
foreach seg $parts {
incr l [string length $seg]
}
return [expr {$l + [llength $parts] -2}]
}
#slower - doesn't work for short paths like c:/
proc pathcharacterlen2 {pathrep} {
return [tcl::mathop::+ {*}[lmap v [set plist [file split $pathrep]] {[string length $v]}] [llength $plist] -2]
}
#Strip using lengths without examining path components
#without normalization is much faster
proc path_strip_alreadynormalized_prefixdepth {path prefix} {
set tail [lrange [file split $path] [llength [file split $prefix]] end]
if {[llength $tail]} {
return [file join {*}$tail]
} else {
return ""
}
}
proc du_dirlisting_undecided {folderpath args} {
if {"windows" eq $::tcl_platform(platform)} {
#jmn disable twapi
#tailcall du_dirlisting_generic $folderpath {*}$args
package require zzzload
set loadstate [zzzload::pkg_require twapi]
if {$loadstate ni [list loading failed]} {
#either already loaded by zzload or ordinary package require
package require twapi ;#should be fast once twapi dll loaded in zzzload thread
set ::punk::du::has_twapi 1
punk::du::active::set_active_function du_dirlisting du_dirlisting_twapi
tailcall du_dirlisting_twapi $folderpath {*}$args
} else {
if {$loadstate eq "failed"} {
puts stderr "punk::du defaulting to du_dirlisting_generic because twapi load failed"
punk::du::active::set_active_function du_dirlisting du_dirlisting_generic
}
tailcall du_dirlisting_generic $folderpath {*}$args
}
} else {
punk::du::active::set_active_function du_dirlisting du_dirlisting_unix
tailcall du_dirlisting_unix $folderpath {*}$args
}
}
}
package require natsort
#interp alias {} du {} .=args>* punk::du |> .=>1 natsort::sort -cols 1 |> list_as_lines <args|
#use natsort -debug 2 to see index output
#this works better for display of directory/file names with spaces (doesn't show curly braces)
interp alias {} du2 {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat words |> list_as_lines <args|
#experiment with csv as easy way to get column like format..
#The /r is somewhat cheating however.. as it messes up redirected output .. e.g if redirected to text file
interp alias {} du {} .=args>* punk::du::du |> .=>1 natsort::sort -cols 1 -outputformat csv -outputformatoptions {\r\t\t\t} |> list_as_lines -- <args|
}
namespace eval ::punk::du::active {
variable functions
variable functions_kown
upvar ::punk::du::has_twapi has_twapi
set_active_function du_dirlisting du_dirlisting_undecided
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
## Ready
package provide punk::du [namespace eval punk::du {
variable version
set version 0.1.0
}]
return