# traverse.tcl -- # # Directory traversal. # # Copyright (c) 2006-2015 by Andreas Kupries # # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. package require Tcl 8.5 9 # OO core if {[package vsatisfies [package present Tcl] 8.5 9]} { # Use new Tcl 8.5a6+ features to specify the allowed packages. # We can use anything above 1.3. This means v2 as well. package require snit 1.3- } else { # For Tcl 8.{3,4} only snit1 of a suitable patchlevel is possible. package require snit 1.3 } package require control ; # Helpers for control structures package require fileutil ; # -> fullnormalize snit::type ::fileutil::traverse { # Incremental directory traversal. # API # create %AUTO% basedirectory options... -> object # next filevar -> boolean # foreach filevar script # files -> list (path ...) # Options # -prefilter command-prefix # -filter command-prefix # -errorcmd command-prefix # Use cases # # (a) Basic incremental # - Create and configure a traversal object. # - Execute 'next' to retrieve one path at a time, # until the command returns False, signaling that # the iterator has exhausted the supply of paths. # (The path is stored in the named variable). # # The execution of 'next' can be done in a loop, or via event # processing. # (b) Basic loop # - Create and configure a traversal object. # - Run a script for each path, using 'foreach'. # This is a convenient standard wrapper around 'next'. # # The loop properly handles all possible Tcl result codes. # (c) Non-incremental, non-looping. # - Create and configure a traversal object. # - Retrieve a list of all paths via 'files'. # The -prefilter callback is executed for directories. Its result # determines if the traverser recurses into the directory or not. # The default is to always recurse into all directories. The call- # back is invoked with a single argument, the path of the # directory. # # The -filter callback is executed for all paths. Its result # determines if the current path is a valid result, and returned # by 'next'. The default is to accept all paths as valid. The # callback is invoked with a single argument, the path to check. # The -errorcmd callback is executed for all paths the traverser # has trouble with. Like being unable to cd into them, get their # status, etc. The default is to ignore any such problems. The # callback is invoked with a two arguments, the path for which the # error occured, and the error message. Errors thrown by the # filter callbacks are handled through this callback too. Errors # thrown by the error callback itself are not caught and ignored, # but allowed to pass to the caller, usually of 'next'. # Note: Low-level functionality, version and platform dependent is # implemented in procedures, and conditioally defined for optimal # use of features, etc. ... # Note: Traversal is done in depth-first pre-order. # Note: The options are handled only during # construction. Afterward they are read-only and attempts to # modify them will cause the system to throw errors. # ### ### ### ######### ######### ######### ## Implementation option -filter -default {} -readonly 1 option -prefilter -default {} -readonly 1 option -errorcmd -default {} -readonly 1 constructor {basedir args} { set _base $basedir $self configurelist $args return } method files {} { set files {} $self foreach f {lappend files $f} return $files } method foreach {fvar body} { upvar 1 $fvar currentfile # (Re-)initialize the traversal state on every call. $self Init while {[$self next currentfile]} { set code [catch {uplevel 1 $body} result] # decide what to do upon the return code: # # 0 - the body executed successfully # 1 - the body raised an error # 2 - the body invoked [return] # 3 - the body invoked [break] # 4 - the body invoked [continue] # everything else - return and pass on the results # switch -exact -- $code { 0 {} 1 { return -errorinfo [::control::ErrorInfoAsCaller uplevel foreach] \ -errorcode $::errorCode -code error $result } 3 { # FRINK: nocheck return } 4 {} default { return -code $code $result } } } return } method next {fvar} { upvar 1 $fvar currentfile # Initialize on first call. if {!$_init} { $self Init } # We (still) have valid paths in the result stack, return the # next one. if {[llength $_results]} { set top [lindex $_results end] set _results [lreplace $_results end end] set currentfile $top return 1 } # Take the next directory waiting in the processing stack and # fill the result stack with all valid files and sub- # directories contained in it. Extend the processing queue # with all sub-directories not yet seen already (!circular # symlinks) and accepted by the prefilter. We stop iterating # when we either have no directories to process anymore, or # the result stack contains at least one path we can return. while {[llength $_pending]} { set top [lindex $_pending end] set _pending [lreplace $_pending end end] # Directory accessible? Skip if not. if {![ACCESS $top]} { Error $top "Inacessible directory" continue } # Expand the result stack with all files in the directory, # modulo filtering. foreach f [GLOBF $top] { if {![Valid $f]} continue lappend _results $f } # Expand the result stack with all sub-directories in the # directory, modulo filtering. Further expand the # processing stack with the same directories, if not seen # yet and modulo pre-filtering. foreach f [GLOBD $top] { if { [string equal [file tail $f] "."] || [string equal [file tail $f] ".."] } continue if {[Valid $f]} { lappend _results $f } Enter $top $f if {[Cycle $f]} continue if {[Recurse $f]} { lappend _pending $f } } # Stop expanding if we have paths to return. if {[llength $_results]} { set top [lindex $_results end] set _results [lreplace $_results end end] set currentfile $top return 1 } } # Allow re-initialization with next call. set _init 0 return 0 } # ### ### ### ######### ######### ######### ## Traversal state # * Initialization flag. Checked in 'next', reset by next when no # more files are available. Set in 'Init'. # * Base directory (or file) to start the traversal from. # * Stack of prefiltered unknown directories waiting for # processing, i.e. expansion (TOP at end). # * Stack of valid paths waiting to be returned as results. # * Set of directories already visited (normalized paths), for # detection of circular symbolic links. variable _init 0 ; # Initialization flag. variable _base {} ; # Base directory. variable _pending {} ; # Processing stack. variable _results {} ; # Result stack. # sym link handling (to break cycles, while allowing the following of non-cycle links). # Notes # - path parent tracking is lexical. # - path identity tracking is based on the normalized path, i.e. the path with all # symlinks resolved. # Maps # - path -> parent (easier to follow the list than doing dirname's) # - path -> normalized (cache to avoid redundant calls of fullnormalize) # cycle <=> A parent's normalized form (NF) is identical to the current path's NF variable _parent -array {} variable _norm -array {} # ### ### ### ######### ######### ######### ## Internal helpers. proc Enter {parent path} { #puts ___E|$path upvar 1 _parent _parent _norm _norm set _parent($path) $parent set _norm($path) [fileutil::fullnormalize $path] } proc Cycle {path} { upvar 1 _parent _parent _norm _norm set nform $_norm($path) set paren $_parent($path) while {$paren ne {}} { if {$_norm($paren) eq $nform} { return yes } set paren $_parent($paren) } return no } method Init {} { array unset _parent * array unset _norm * # Path ok as result? if {[Valid $_base]} { lappend _results $_base } # Expansion allowed by prefilter? if {[file isdirectory $_base] && [Recurse $_base]} { Enter {} $_base lappend _pending $_base } # System is set up now. set _init 1 return } proc Valid {path} { #puts ___V|$path upvar 1 options options if {![llength $options(-filter)]} {return 1} set path [file normalize $path] set code [catch {uplevel \#0 [linsert $options(-filter) end $path]} valid] if {!$code} {return $valid} Error $path $valid return 0 } proc Recurse {path} { #puts ___X|$path upvar 1 options options _norm _norm if {![llength $options(-prefilter)]} {return 1} set path [file normalize $path] set code [catch {uplevel \#0 [linsert $options(-prefilter) end $path]} valid] if {!$code} {return $valid} Error $path $valid return 0 } proc Error {path msg} { upvar 1 options options if {![llength $options(-errorcmd)]} return set path [file normalize $path] uplevel \#0 [linsert $options(-errorcmd) end $path $msg] return } ## # ### ### ### ######### ######### ######### } # ### ### ### ######### ######### ######### ## # Tcl 8.5+. # We have to check readability of "current" on our own, glob # changed to error out instead of returning nothing. proc ::fileutil::traverse::ACCESS {args} {return 1} proc ::fileutil::traverse::GLOBF {current} { if {![file readable $current] || [BadLink $current]} { return {} } set res [lsort -unique [concat \ [glob -nocomplain -directory $current -types f -- *] \ [glob -nocomplain -directory $current -types {hidden f} -- *]]] # Look for broken links (They are reported as neither file nor directory). foreach l [lsort -unique [concat \ [glob -nocomplain -directory $current -types l -- *] \ [glob -nocomplain -directory $current -types {hidden l} -- *]]] { if {[file isfile $l]} continue if {[file isdirectory $l]} continue lappend res $l } return [lsort -unique $res] } proc ::fileutil::traverse::GLOBD {current} { if {![file readable $current] || [BadLink $current]} { return {} } lsort -unique [concat \ [glob -nocomplain -directory $current -types d -- *] \ [glob -nocomplain -directory $current -types {hidden d} -- *]] } proc ::fileutil::traverse::BadLink {current} { if {[file type $current] ne "link"} { return no } set dst [file join [file dirname $current] [file readlink $current]] if {![file exists $dst] || ![file readable $dst]} { return yes } return no } # ### ### ### ######### ######### ######### ## Ready package provide fileutil::traverse 0.7