# -*- tcl -*-
# Maintenance Instruction: leave the 999999.xxx.x as is and use 'pmix make' or src/make.tcl to update from <pkg>-buildversion.txt
#
# Please consider using a BSD or MIT style license for greatest compatibility with the Tcl ecosystem.
# Code using preferred Tcl licenses can be eligible for inclusion in Tcllib, Tklib and the punk package repository.
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# (C) 2023
#
# @@ Meta Begin
# Application punk::path 999999.0a1.0
# Meta platform     tcl
# Meta license      <unspecified>
# @@ Meta End


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# doctools header
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[manpage_begin punkshell_module_punk::path 0 999999.0a1.0]
#[copyright "2023"]
#[titledesc {Filesystem path utilities}]  [comment {-- Name section and table of contents description --}]
#[moddesc {punk path filesystem utils}]             [comment {-- Description at end of page heading --}] 
#[require punk::path]
#[description]
#[keywords module path filesystem]

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[section Overview]
#[para] overview of punk::path
#[para] Filesystem path utility functions
#[subsection Concepts]
#[para] -


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Requirements
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[subsection dependencies]
#[para] packages used by punk::path
#[list_begin itemized]

package require Tcl 8.6-
#*** !doctools
#[item] [package {Tcl 8.6-}]

# #package require frobz
# #*** !doctools
# #[item] [package {frobz}]

#*** !doctools
#[list_end]

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

#*** !doctools
#[section API]

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# oo::class namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::path::class {
    #*** !doctools
    #[subsection {Namespace punk::path::class}]
    #[para] class definitions
    if {[info commands [namespace current]::interface_sample1] eq ""} {
        #*** !doctools
        #[list_begin enumerated]

        # oo::class create interface_sample1 {
        #     #*** !doctools
        #     #[enum] CLASS [class interface_sample1]
        #     #[list_begin definitions]

        #     method test {arg1} {
        #         #*** !doctools
        #         #[call class::interface_sample1    [method test] [arg arg1]]
        #         #[para] test method
        #         puts "test: $arg1"
        #     }

        #     #*** !doctools
        #     #[list_end] [comment {-- end definitions interface_sample1}]
        # }

        #*** !doctools
        #[list_end] [comment {--- end class enumeration ---}]
    }
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++

# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Base namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::path {
    namespace export *
    #variable xyz

    #*** !doctools
    #[subsection {Namespace punk::path}]
    #[para] Core API functions for punk::path 
    #[list_begin definitions]


    proc pathglob_as_re {pathglob} {
        #*** !doctools
        #[call [fun pathglob_as_re] [arg pathglob]]
        #[para] Returns a regular expression for matching a path to a glob pattern which can contain glob chars *|? in any segment of the path structure
        #[para] ** matches any number of subdirectories. 
        #[para] e.g /etc/**/*.txt will match any .txt files at any depth below /etc  (except directly within /etc itself)
        #[para] e.g /etc/**.txt will match any .txt files at any depth below /etc
        #[para] any segment that does not contain ** must match exactly one segment in the path
        #[para] e.g the glob /etc/*/*.doc  - will match any .doc files that are exactly one tree level below /etc 
        #[para] The pathglob doesn't have to contain glob characters, in which case the returned regex will match the pathglob exactly as specified.
        #[para] Regular expression syntax is deliberateley not supported within the pathglob string so that supplied regex characters will be treated as literals 


        #todo - consider whether a way to escape the glob chars ? * is practical - to allow literals ? *
        # - would require counting immediately-preceding backslashes
        set pats [list]
        foreach seg [file split $pathglob] {
            if {[string range $seg end end] eq "/"} {
                set seg [string range $seg 0 end-1] ;# e.g c:/ -> c:   / -> ""  so that join at end doesn't double up
            }
            switch -- $seg {
                *   {lappend pats {[^/]*}}
                **  {lappend pats {.*}} 
                default {
                    set seg [string map [list {^ {\^} $ {\$} [} {\[} ( {\(} \{ \\\{ \\ {\\}] $seg] ;#treat regex characters in the input as literals 
                    set seg [string map [list  . {[.]}] $seg]
                    if {[regexp {[*?]} $seg]} {
                        set pat [string map [list ** {.*} * {[^/]*} ? {[^/]}] $seg]
                        lappend pats "$pat"
                    } else {
                        lappend pats "$seg"
                    }
                }
            }
        }
        return  "^[join $pats /]\$"
    }
    proc globmatchpath {pathglob path args} {
        #*** !doctools
        #[call [fun globmatchpath] [arg pathglob] [arg path] [opt {option value...}]]
        #[para] Return true if the pathglob matches the path
        #[para] see [fun pathglob_as_re] for pathglob description
        #[para] Caller must ensure that file separator is forward slash. (e.g use file normalize on windows)
        #[para]
        #[para] Known options:
        #[para]     -nocase 0|1  (default 0 - case sensitive)
        #[para] If -nocase is not supplied - default to case sensitive *except for driveletter*
        #[para] ie - the driveletter alone in paths such as c:/etc will still be case insensitive. (ie c:/ETC/* will match C:/ETC/blah but not C:/etc/blah)
        #[para] Explicitly specifying -nocase 0 will require the entire case to match including the driveletter.

        set defaults [dict create\
            -nocase \uFFFF\
        ]
        set known_opts [dict keys $defaults]
        set opts [dict merge $defaults $args]
        dict for {k v} $args {
            if {$k ni $known_opts} {
                error "Unrecognised options $k - known options: $known_opts"
            }
        }
        # -- --- --- --- --- --- 
        set opt_nocase [dict get $opts -nocase]
        set explicit_nocase 1 ;#default to disprove 
        if {$opt_nocase eq "\uFFFF"} {
            set opt_nocase 0
            set explicit_nocase 0
        } 
        # -- --- --- --- --- --- 
        if {$opt_nocase} {
            return [regexp -nocase [pathglob_as_re $pathglob] $path]
        } else {
            set re [pathglob_as_re $pathglob]
            if {$explicit_nocase} {
                set ismatch [regexp $re $path] ;#explicit -nocase 0  - require exact match of path literals including driveletter
            } else {
                #caller is using default for -nocase - which indicates case sensitivity - but we have an exception for the driveletter.
                set re_segments [file split $re] ;#Note that file split c:/etc gives {c:/ etc}  but file split ^c:/etc gives {^c: etc}
                set first_seg [lindex $re_segments 0]
                if {[regexp {^\^(.{1}):$} $first_seg _match driveletter]} {
                    #first part of re is like "^c:" i.e a drive letter
                    set chars [string tolower $driveletter][string toupper $driveletter]
                    set re [join [concat "^\[$chars\]:" [lrange $re_segments 1 end]] /] ;#rebuild re with case insensitive driveletter only - use join - not file join. file join will misinterpret leading re segment.
                }
                #puts stderr "-->re: $re"
                set ismatch [regexp $re $path]
            }
        }
        return $ismatch
    }

    #todo - implement treefiles which acts like dirfiles but allows path globbing in the same way as punk::ns::ns/ 
    #then review if treefiles can replace dirfiles or if both should exist (dirfiles can have literal glob chars in path segments - but that is a rare usecase)
    proc treefilenames {args} {
        #*** !doctools
        #[call [fun treefilenames]  [opt {option value...}] [opt {globpattern...}]]
        #[para]basic (glob based) list of filenames matching each pattern in tailglobs - recursive
        #[para] options:
        #[para] [opt -dir] <path> 
        #[para]     defaults to [lb]pwd[rb] - base path for tree to search
        #[para] [opt -antiglob_paths] <list> 
        #[para]     list of path patterns to exclude - may include * and ** path segments e.g /usr/**
        #[para]no natsorting - so order is dependent on filesystem

        lassign [punk::get_leading_opts_and_values {
            -directory "\uFFFF"
            -call-depth-internal 0
            -antiglob_paths {}
        } $args] _o opts _v tailglobs 

        # -- --- --- --- --- --- ---
        set opt_antiglob_paths [dict get $opts -antiglob_paths]
        set CALLDEPTH [dict get $opts -call-depth-internal]
        # -- --- --- --- --- --- ---
        set opt_dir             [dict get $opts -directory]
        if {$opt_dir eq "\uFFFF"} {
            set opt_dir [pwd]
        }
        # -- --- --- --- --- --- ---

        set files [list]
        if {$CALLDEPTH == 0} {
            if {![file isdirectory $opt_dir]} {
                return [list] 
            }
            set opts [dict merge $opts [list -directory $opt_dir]]
            if {![llength $tailglobs]} {
                lappend tailglobs *
            }
        }

        set skip 0
        foreach anti $opt_antiglob_paths {
            if {[globmatchpath $anti $opt_dir]} {
                set skip 1
                break
            }
        }
        if {$skip} {
            return [list]
        }

        #todo - account for vfs where matched path could appear to be a directory but is mounted so could be a desired match?
        set dirfiles [lsort [glob -nocomplain -dir $opt_dir -type f {*}$tailglobs]]
        lappend files  {*}$dirfiles
        set dirdirs [glob -nocomplain -dir $opt_dir -type d *]
        foreach dir $dirdirs {
            set skip 0
            foreach anti $opt_antiglob_paths {
                if {[globmatchpath $anti $dir]} {
                    set skip 1
                    break 
                }
            }
            if {$skip} {
                continue
            }
            set nextopts [dict merge $opts [list -directory $dir -call-depth-internal [incr CALLDEPTH]]]
            lappend files {*}[treefilenames {*}$nextopts {*}$tailglobs]
        }
        return $files
    }

    #maint warning - also in punkcheck
    proc relative {reference location} {
        #*** !doctools
        #[call [fun relative] [arg reference] [arg location]]
        #[para]    Taking two directory paths, a reference and a location, computes the path
        #    of the location relative to the reference.
        #[list_begin itemized]
        #[item]
        #[para] Arguments:
        #       [list_begin arguments]
        #       [arg_def string reference]       The path from which the relative path to location is determined. 
        #       [arg_def string location]        The location path which may be above or below the reference path 
        #       [list_end]
        #[item]
        #[para] Results:
        #[para]    The relative path of the location to the reference path.
        #[para]    Will return a single dot "." if the paths are the same
        #[item]
        #[para] Notes:
        #[para]    Both paths must be the same type - ie both absolute or both relative
        #[para]    Case sensitive.  ie  punk::path::relative /etc /etC 
        #      will return ../etC
        #[para]    On windows, the drive-letter component (only) is not case sensitive
        #[example_begin]
        #    P% punk::path::relative c:/etc  C:/etc
        #    -  .
        #[example_end]
        #[para] The part following the driveletter is case sensitive so in the following cases it recognises the driveletter matches but not the tail
        #[example_begin]
        #    P% punk::path::relative c:/etc C:/Etc
        #    -  ../Etc
        #[example_end]
        #[para]    On windows, if the paths are absolute and specifiy different volumes, only the location will be returned.
        #[example_begin]
        #    P% punk::path::relative c:/etc d:/etc/blah
        #    -  d:/etc/blah
        #[example_end]
        #[para] Unix-like examples:
        #[example_begin]
        #   P% punk::path::relative /usr/local/etc/ /usr/local/etc/somewhere/below
        #   - somewhere/below
        #   P% punk::path::relative /usr/local/etc/somewhere /usr/local/lib/here   
        #   - ../../lib/here
        #[example_end]
        #[list_end]

        #see also kettle
        # Modified copy of ::fileutil::relative (tcllib)
        # Adapted to 8.5 ({*}).

        #review - check volume info on windows.. UNC paths?
        if {[file pathtype $reference] ne [file pathtype $location]} {
            return -code error "Unable to compute relation for paths of different pathtypes: [file pathtype $reference] vs. [file pathtype $location], ($reference vs. $location)"
        }

        #avoid normalizing if possible (file normalize *very* expensive on windows)
        set do_normalize 0
        if {[file pathtype $reference] eq "relative"} {
            #if reference is relative so is location 
            if {[regexp {[.]{2}} [list $reference $location]]} {
                set do_normalize 1
            }
            if {[regexp {[.]/} [list $reference $location]]} {
                set do_normalize 1
            }
        } else {
            set do_normalize 1
        }
        if {$do_normalize} {
            set reference [file normalize $reference]
            set location  [file normalize $location]
        }

        set save $location
        set reference [file split $reference]
        set location  [file split $location]

        while {[lindex $location 0] eq [lindex $reference 0]} {
            set location  [lrange $location  1 end]
            set reference [lrange $reference 1 end]
            if {![llength $location]} {break}
        }

        set location_len  [llength $location]
        set reference_len [llength $reference]

        if {($location_len == 0) && ($reference_len == 0)} {
            # Cases:
            # (a) reference == location

            set location .
        } else {
            # Cases:
            # (b) ref is: ref/sub = sub
            #     loc is: ref     = {}

            # (c) ref is: ref     = {}
            #     loc is: ref/sub = sub

            while {$reference_len > 0} {
                set location [linsert $location 0 ..]
                incr reference_len -1
            }
            set location [file join {*}$location]
        }
        return $location
    }



    #*** !doctools
    #[list_end] [comment {--- end definitions namespace punk::path ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++


# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
# Secondary API namespace
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
namespace eval punk::path::lib {
        namespace export *
        namespace path [namespace parent]
        #*** !doctools
        #[subsection {Namespace punk::path::lib}]
        #[para] Secondary functions that are part of the API 
        #[list_begin definitions]





        #*** !doctools
        #[list_end] [comment {--- end definitions namespace punk::path::lib ---}]
}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++



# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
#*** !doctools
#[section Internal]
namespace eval punk::path::system {
        #*** !doctools
        #[subsection {Namespace punk::path::system}]
        #[para] Internal functions that are not part of the API 



}
# ++ +++ +++ +++ +++ +++ +++ +++ +++ +++ +++
##  Ready   
package provide punk::path [namespace eval punk::path {
    variable pkg punk::path
    variable version
    set version 999999.0a1.0 
}]
return

#*** !doctools
#[manpage_end]